コード例 #1
0
 public HtmlPageCategorizationProcessor(Initializer initializer,Queue<Url> frontier)
 {
     extractor = new Extractor();
     categorizer = new Categorizer(initializer.getCategoryList());
     ranker = new Ranker(categorizer);
     filter = new Filter("http://",initializer.getContraints());
     queueFronier = frontier;
     taskId = initializer.getTaskId();
 }
コード例 #2
0
        public void Can_extract_text()
        {
            ////Arrange
            var pdfStream = GetType().Assembly.GetManifestResourceStream(typeof (ExtractorTests), "sample.pdf");
            string result;

            ////Act
            using (var extractor = new Extractor())
                result = extractor.ExtractToString(pdfStream);

            ////Assert
            Assert.That(result.Trim(), Is.EqualTo("hello world"));
        }
コード例 #3
0
        public void TestDateTimeExtractor()
        {
            TestPreValidation();

            var referenceDateTime = TestSpec.GetReferenceDateTime();

            var actualResults   = Extractor.Extract(TestSpec.Input, referenceDateTime);
            var expectedResults = TestSpec.CastResults <ExtractResult>();

            Assert.AreEqual(expectedResults.Count(), actualResults.Count, GetMessage(TestSpec));

            foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create))
            {
                var expected = tuple.Item1;
                var actual   = tuple.Item2;

                Assert.AreEqual(expected.Type, actual.Type, GetMessage(TestSpec));
                Assert.AreEqual(expected.Text, actual.Text, GetMessage(TestSpec));
            }
        }
コード例 #4
0
        public static async Task <IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "get", Route = null)] HttpRequest req,
            ILogger log)
        {
            log.LogInformation("SubscriptionsFunction started");

            var authHeader = req.Headers.Single(x => x.Key == "Authorization");

            if (string.IsNullOrEmpty(authHeader.Value))
            {
                return(new UnauthorizedObjectResult("Invalid Token"));                                        // return HTTP 401 Unauthorized
            }
            var token = authHeader.Value.ToString().Replace("Bearer", "").Trim();

            Extractor = new Extractor();
            Extractor.Authenticate(token);
            await Extractor.Load();

            return(new OkObjectResult(Extractor.Subscriptions));
        }
コード例 #5
0
        public List <ModelResult> Parse(string query)
        {
            var extractResults = Extractor.Extract(query);
            var parseNums      = new List <ParseResult>();

            foreach (var result in extractResults)
            {
                parseNums.Add(Parser.Parse(result));
            }
            return(parseNums.Select(o => new ModelResult
            {
                Start = o.Start.Value,
                End = o.Start.Value + o.Length.Value - 1,
                Resolution = new SortedDictionary <string, object> {
                    { "value", o.ResolutionStr }
                },
                Text = o.Text,
                TypeName = ModelTypeName
            }).ToList());
        }
コード例 #6
0
ファイル: Form1.cs プロジェクト: dgtorreso/Msdn
 public Form1()
 {
     InitializeComponent();
     button1.Click += (Object c, EventArgs e) =>
     {
         try
         {
             this.Enabled = false;
             var Url = new Uri(textBox1.Text);
             var Extraer = new Extractor(Url);
             MessageBox.Show("Terminó");
             this.Enabled = true;
         }
         catch(Exception)
         {
             this.Enabled = true;
             MessageBox.Show("Ocurrió un error, intente de nuevo");
         }
     };
 }
コード例 #7
0
        /// <summary>
        /// 解析操作
        /// </summary>
        /// <param name="page">页面数据</param>
        protected override void Handle(Page page)
        {
            var datas = Extractor.Extract(page, Model);

            if (datas == null || datas.Count() == 0)
            {
                return;
            }

            foreach (var handler in _dataHandlers)
            {
                for (int i = 0; i < datas.Count(); ++i)
                {
                    dynamic data = datas.ElementAt(i);
                    handler.Handle(ref data, page);
                }
            }

            page.AddResultItem(Model.Identity, new Tuple <IModel, IEnumerable <dynamic> >(Model, datas));
        }
コード例 #8
0
 private void ProcessFile(string filename, BinaryWriter outFile, Extractor item)
 {
     m_stderr.WriteLine("Processing " + filename);
     try
     {
         item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE);
         CustomStream outStream = new CustomStream(new MemoryStream());
         Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(outStream, isys_docfilters.IGR_DEVICE_IMAGE_TIF, "");
         try
         {
             for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++)
             {
                 Hyland.DocumentFilters.Page page = item.GetPage(pageIndex);
                 try
                 {
                     canvas.RenderPage(page);
                 }
                 finally
                 {
                     page.Close();
                 }
             }
         }
         finally
         {
             canvas.Close();
             // Write in-memory stream contents to existing open file (e.g. stdout)
             long BytesWritten = outStream.writeTo(outFile);
             m_stderr.WriteLine(BytesWritten + " bytes written to outFile");
         }
     }
     catch (Exception e)
     {
         m_stderr.WriteLine("Error Processing " + filename);
         m_stderr.WriteLine("   - " + e.ToString());
     }
     finally
     {
         item.Close();
     }
 }
コード例 #9
0
ファイル: TestBase.cs プロジェクト: buptqq/Recognizers-Text-1
        public void TestDateTimeMergedParser()
        {
            if (TestUtils.EvaluateSpec(TestSpec, out string message))
            {
                Assert.Inconclusive(message);
            }

            if (Debugger.IsAttached && TestSpec.Debug)
            {
                Debugger.Break();
            }

            var referenceDateTime = TestSpec.GetReferenceDateTime();

            var extractResults = Extractor.Extract(TestSpec.Input, referenceDateTime);
            var actualResults  = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray();

            var expectedResults = TestSpec.CastResults <DateTimeParseResult>();

            Assert.AreEqual(expectedResults.Count(), actualResults.Count(), GetMessage(TestSpec));

            foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create))
            {
                var expected = tuple.Item1;
                var actual   = tuple.Item2;

                var values = actual.Value as IDictionary <string, object>;
                if (values != null)
                {
                    var actualValues = values["values"] as IList <Dictionary <string, string> >;

                    var expectedObj    = JsonConvert.DeserializeObject <IDictionary <string, IList <Dictionary <string, string> > > >(expected.Value.ToString());
                    var expectedValues = expectedObj["values"];

                    foreach (var results in Enumerable.Zip(expectedValues, actualValues, Tuple.Create))
                    {
                        CollectionAssert.AreEqual(results.Item1, results.Item2, GetMessage(TestSpec));
                    }
                }
            }
        }
コード例 #10
0
        public void TestDateTimeMergedParser()
        {
            TestPreValidation();

            var referenceDateTime = TestSpec.GetReferenceDateTime();

            var extractResults = Extractor.Extract(TestSpec.Input, referenceDateTime);
            var actualResults  = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray();

            var expectedResults = TestSpec.CastResults <DateTimeParseResult>();

            Assert.AreEqual(expectedResults.Count(), actualResults.Length, GetMessage(TestSpec));

            foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create))
            {
                var expected = tuple.Item1;
                var actual   = tuple.Item2;

                Assert.AreEqual(expected.Text, actual.Text, GetMessage(TestSpec));
                Assert.AreEqual(expected.Type, actual.Type, GetMessage(TestSpec));
                Assert.AreEqual(expected.Start, actual.Start, GetMessage(TestSpec));
                Assert.AreEqual(expected.Length, actual.Length, GetMessage(TestSpec));

                if (actual.Value is IDictionary <string, object> values)
                {
                    // Actual ValueSet types should not be modified as that's considered a breaking API change
                    var actualValues = values[ResolutionKey.ValueSet] as IList <Dictionary <string, string> >;

                    var expectedObj =
                        JsonConvert.DeserializeObject <IDictionary <string, IList <Dictionary <string, string> > > >(
                            expected.Value.ToString());
                    var expectedValues = expectedObj[ResolutionKey.ValueSet];

                    foreach (var value in expectedValues.Zip(actualValues, Tuple.Create))
                    {
                        Assert.AreEqual(value.Item1.Count, value.Item2.Count, GetMessage(TestSpec));
                        CollectionAssert.AreEqual(value.Item1, value.Item2, GetMessage(TestSpec));
                    }
                }
            }
        }
コード例 #11
0
        public async Task TestExtractorExtractAllAsync()
        {
            var file    = new DirectoryFile(TestFile);
            var fullDir = Path.Combine(ExtractPath, Path.GetFileNameWithoutExtension(file.FilePath) + Path.DirectorySeparatorChar);

            Directory.CreateDirectory(fullDir);

            int    entriesToRead       = file.EntryBlocks.Sum(b => b.Entries.Count);
            int    entriesToDecompress = file.EntryBlocks.SelectMany(b => b.Entries.Where(e => e.IsCompressed)).Count();
            int    entriesRead         = 0;
            int    entriesDecompressed = 0;
            object _lock = new object();

            void func(EntryOperation e)
            {
                lock (_lock)
                {
                    switch (e.OperationPerformed)
                    {
                    case EntryOperation.ProcessType.Read:
                        entriesRead++;
                        Console.WriteLine($"Read: {entriesRead}/{entriesToRead}");
                        break;

                    case EntryOperation.ProcessType.Decompress:
                        entriesDecompressed++;
                        Console.WriteLine($"Decompressed: {entriesDecompressed}/{entriesToDecompress}");
                        break;

                    default:
                        break;
                    }
                }
            }

            Progress <EntryOperation> prog = new Progress <EntryOperation>(func);

            using var extractor = new Extractor(file);
            var   task = extractor.ExtractAllAsync(fullDir, prog);
            await task;
        }
コード例 #12
0
        internal T Populate <T>(T e) where T : IExtractedEntity
        {
            if (e.Label.Valid)
            {
                return(e);   // Already populated
            }

            if (ids.TryGetValue(e, out var existing))
            {
                // It exists already
                e.Label = existing;
            }
            else
            {
                e.Label = GetNewLabel();
                DefineLabel(e);
                ids.Add(e, e.Label);
                PopulateLater(() =>
                {
                    foreach (var c in e.Contents)
                    {
                        c.Extract(this);
                    }
                });
#if DEBUG_LABELS
                using var writer = new EscapingTextWriter();
                e.WriteId(writer);
                var id = writer.ToString();

                if (debugLabels.TryGetValue(id, out var previousEntity))
                {
                    Extractor.Message(new Message("Duplicate trap ID", id, null, severity: Util.Logging.Severity.Warning));
                }
                else
                {
                    debugLabels.Add(id, e);
                }
#endif
            }
            return(e);
        }
コード例 #13
0
        private void ReadProcedures(Database db, List <DBObjectType> dataToExtract)
        {
            Extractor.GetProcedures(db);
            foreach (Procedure p in db.Procedures)
            {
                if (BeforeExtractData != null)
                {
                    BeforeExtractData(DBObjectType.Procedures, p.Name);
                }

                Extractor.GetProcedureText(db, p);
                if (dataToExtract.Contains(DBObjectType.All) || dataToExtract.Contains(DBObjectType.ProcedureInputParameters))
                {
                    Extractor.GetProcedureInputParameters(db, p);
                }
                if (dataToExtract.Contains(DBObjectType.All) || dataToExtract.Contains(DBObjectType.ProcedureOutputRecordSets))
                {
                    Extractor.GetProcedureOutputRecordSets(db, p);
                }
            }
        }
コード例 #14
0
ファイル: Form1.cs プロジェクト: SCRUG/PDFcurdler
        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog openFileDialog1 = new OpenFileDialog();

            openFileDialog1.Multiselect = true;
            DialogResult result = openFileDialog1.ShowDialog(); // Show the dialog.

            if (result == DialogResult.OK)                      // Test result.
            {
                string[]      filesArray = openFileDialog1.FileNames;
                List <string> filesList  = new List <string>(filesArray);
                this.label1.Text = String.Format("Seleccionados: {0}", filesList.Count);
                this._extractor  = new Extractor(filesList);

                this.ShowFilesInList(filesList);
            }
            else
            {
                MessageBox.Show("Debe seleccionar algun archivo PDF.");
            }
        }
コード例 #15
0
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
//ORIGINAL LINE: @Test void shouldCloneExtractor()
        internal virtual void ShouldCloneExtractor()
        {
            // GIVEN
            Extractors         extractors = new Extractors(';');
            Extractor <string> e1         = extractors.String();
            Extractor <string> e2         = e1.Clone();

            // WHEN
            string v1 = "abc";

            e1.Extract(v1.ToCharArray(), 0, v1.Length, false);
            assertEquals(v1, e1.Value());
            assertNull(e2.Value());

            // THEN
            string v2 = "def";

            e2.Extract(v2.ToCharArray(), 0, v2.Length, false);
            assertEquals(v2, e2.Value());
            assertEquals(v1, e1.Value());
        }
コード例 #16
0
 /// <summary>
 /// Incorporate one aggregatable value into the result.
 /// </summary>
 /// <remarks>
 /// If the <paramref name="isFinal"/> parameter is <b>true</b>, the
 /// given object is a partial result (returned by an individual
 /// parallel aggregator) that should be incorporated into the final
 /// result; otherwise, the object is a value extracted from an
 /// <see cref="IInvocableCacheEntry"/>
 /// </remarks>
 /// <param name="o">
 /// The value to incorporate into the aggregated result.
 /// </param>
 /// <param name="isFinal">
 /// <b>true</b> to indicate that the given object is a partial
 /// result returned by a parallel aggregator.
 /// </param>
 protected override void Process(object o, bool isFinal)
 {
     if (o != null)
     {
         if (isFinal)
         {
             if (o is DictionaryEntry)
             {
                 DictionaryEntry entry = (DictionaryEntry)o;
                 EnsureMap().Add(entry.Key, Extractor.Extract(entry.Value));
             }
         }
         else
         {
             // should not be called with isFinal == false
             // that would mean multiple aggregators would be running on the client,
             // but shouldn't happen.
             throw new InvalidOperationException();
         }
     }
 }
コード例 #17
0
        private static void AddIndices(Extractor extractor, Database database)
        {
            foreach (var schema in database.Schemas)
            {
                var tableID = new TableID(schema.DatabaseName, schema.SchemaName, null);
                var indices = extractor.Database.GetIndices(schema.SchemaName);
                foreach (var index in indices)
                {
                    tableID.TableName = index.TableName;
                    if (!database.tableDictionary.ContainsKey(tableID))
                    {
                        continue;
                    }

                    var table = database.tableDictionary[tableID];
                    using (var ix = new Index(table, index.IndexName))
                    {
                        var columns = from colname in index.IndexOrderdColumnNames
                                      orderby colname.Key
                                      select colname.Value;
                        var addIndex = true;
                        foreach (var colname in columns)
                        {
                            var column = table.Columns.FirstOrDefault(col => col.ColumnName == colname);
                            if (column == null)
                            {
                                addIndex = false;
                                break;
                            }
                            ix._columns.Add(column);
                        }
                        ix.IsUnique = index.IsUnique;
                        if (addIndex)
                        {
                            table._indices.Add(ix);
                        }
                    }
                }
            }
        }
コード例 #18
0
        public MainForm()
        {
            InitializeComponent();

            string[] spstring = Application.ExecutablePath.Split(new string[] { "\\osu! mp3 extractor.exe" }, StringSplitOptions.None);
            AppPath = spstring[0];                 //appPath

            Configs = new Configurations(AppPath); //Initialize configurations
            if (!Configs.getConfigurations())
            {
                ConfigurationsForm configForm = new ConfigurationsForm();
                configForm.ShowDialog();
                if (configForm.ShouldClose)
                {
                    Environment.Exit(0);
                }

                while (!Configs.updateConfigurations(configForm.Cfg))
                {
                    configForm.Dispose();

                    configForm = new ConfigurationsForm(configForm.Cfg);
                    configForm.ShowDialog();
                    if (configForm.ShouldClose)
                    {
                        Environment.Exit(0);
                    }
                }
            }

            outputLabelData.Text = Configs.OutPath;
            osuLabelData.Text    = Configs.OsuPath;

            extractor = new Extractor(this.progressBar, this.extractButton, this.optionsButton, this.cancelButton);

            Cdb = new CollectionDb();
            Odb = new OsuDb();

            updateComboBox(false);
        }
コード例 #19
0
        public bool tryClientConnect(Extractor job, int attempts, bool unlockOnFail)
        {
            Client c = new Client();

            _report.addDebug("Attempting to connect to a client to verify a task is still running...");
            for (int i = 0; i < attempts; i++)
            {
                try
                {
                    c.connect(job.HostName, job.ListeningPort);
                    c.disconnect();
                    _report.addDebug("The client appears to still be running!");
                    return(true);
                }
                catch (Exception)
                {
                    c.disconnect();
                    if (i < attempts)
                    {
                        _report.addDebug("The client did not respond on attempt number " + (i + 1) + ", will try " + (attempts - i - 1) + " more times");
                        System.Threading.Thread.Sleep(1000);
                        continue;
                    }
                    if (unlockOnFail)
                    {
                        try
                        {
                            _report.addDebug("The client did not respond after " + attempts + " connection attempts. Unlocking the job");
                            _sqlDao.unlockSite(job.SiteCode, job.VistaFile);
                        }
                        catch (Exception)
                        {
                            // this is probably ok - should get cleaned up eventually
                        }
                    }
                    return(false);
                }
            }
            return(false);
        }
コード例 #20
0
        public byte[] this[int offset, int length]
        {
            get
            {
                if (offset < 16)
                {
                    if ((16 - offset) > length)
                    {
                        length = 16 - offset;
                    }

                    byte[] r = new byte[length];
                    fixed(byte *pbyte = bytes)
                    fixed(byte *rbyte = r)
                    {
                        Extractor.CopyBlock(rbyte, pbyte + offset, length);
                    }
                    return(r);
                }
                return(null);
            }
            set
            {
                if (offset < 16)
                {
                    if ((16 - offset) > length)
                    {
                        length = 16 - offset;
                    }
                    if (value.Length < length)
                        length = value.Length;

                    fixed(byte *rbyte = value)
                    fixed(byte *pbyte = bytes)
                    {
                        Extractor.CopyBlock(pbyte, rbyte, offset, length);
                    }
                }
            }
        }
コード例 #21
0
        private void ProcessFile(string filename, Extractor item)
        {
            string destination = System.IO.Path.Combine(m_outputFolder, System.IO.Path.GetFileNameWithoutExtension(filename) + ".tif");

            m_stderr.WriteLine("Processing " + filename);
            try
            {
                item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE);

                Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(destination, isys_docfilters.IGR_DEVICE_IMAGE_TIF, "");
                try
                {
                    for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++)
                    {
                        Hyland.DocumentFilters.Page page = item.GetPage(pageIndex);
                        try
                        {
                            canvas.RenderPage(page);
                        }
                        finally
                        {
                            page.Close();
                        }
                    }
                }
                finally
                {
                    canvas.Close();
                }
            }
            catch (Exception e)
            {
                m_stderr.WriteLine("Error Processing " + filename);
                m_stderr.WriteLine("   - " + e.ToString());
            }
            finally
            {
                item.Close();
            }
        }
コード例 #22
0
        public List <ModelResult> Parse(string query, System.DateTime refTime)
        {
            // preprocess the query
            query = FormatUtility.Preprocess(query);
            var extractResults = Extractor.Extract(query);

            var parseDateTimes = new List <DateTimeParseResult>();

            foreach (var result in extractResults)
            {
                parseDateTimes.Add(Parser.Parse(result, refTime));
            }

            return(parseDateTimes.Select(o => new ModelResult
            {
                Start = o.Start.Value,
                End = o.Start.Value + o.Length.Value - 1,
                TypeName = o.Type,
                Resolution = o.Value as SortedDictionary <string, object>,
                Text = o.Text
            }).ToList());
        }
コード例 #23
0
        /// <summary>
        /// 解析操作
        /// </summary>
        /// <param name="page">页面数据</param>
        protected override void Handle(Page page)
        {
            var datas = Extractor.Extract(page.Selectable(), Model);

            if (datas == null)
            {
                return;
            }

            var items = datas.ToList();

            foreach (var handler in _dataHandlers)
            {
                for (int i = 0; i < items.Count; ++i)
                {
                    dynamic data = items.ElementAt(i);
                    handler.Handle(ref data, page);
                }
            }

            page.AddResultItem(Model.Identity, new Tuple <IModel, IList <dynamic> >(Model, items));
        }
コード例 #24
0
        public async Task TestExtractMethod()
        {
            var extractor = new Extractor();

            var metaData1 = await extractor.ExtractAsync(new Uri("http://andrew.gubskiy.com/"));

            var metaData2 = await extractor.ExtractAsync(new Uri("http://torf.tv/"));

            var metaData3 = await extractor.ExtractAsync(new Uri("http://torf.tv/video/IraSkladPortrait"));

            var metaData4 = await extractor.ExtractAsync(new Uri("http://www.c-sharpcorner.com/news/stratis-bitcoin-full-node-for-net-core-in-c-sharp-goes-live"));

            var metaData5 = await extractor.ExtractAsync(new Uri("http://www.aaronstannard.com/the-coming-dotnet-reinassance/"));

            Assert.NotNull(metaData1);
            Assert.NotNull(metaData2);
            Assert.NotNull(metaData3);
            Assert.NotNull(metaData4);
            Assert.NotNull(metaData5);

            Assert.NotEmpty(metaData3.Keywords);
        }
コード例 #25
0
        static void Main(string[] args)
        {
            //TransferFromMongoToMSSql
            DataTransferer.TransferDataFromMongoToMsSql();
            Extractor ext = new Extractor("..\\..\\");

            //ExtractFromZIP
            ext.ExtractFromArchive("TravelInfo.zip");
            //PDF Reporter
            PDFReporterGenerator.CreatePDF();
            //JSON Reporter
            Reporter reporter1 = new Reporter();

            reporter1.MakeReports();
            //ExcelReporter
            var reporter = new ExcelReporter();

            reporter.Report();
            var dataReader = new XMLDataInserter();

            dataReader.ParseXML();
        }
コード例 #26
0
        public object TestRule(RuleModel rule, [FromUri] bool debug = false)
        {
            var request = new Request(rule.Url);

            request.Method = rule.Method;
            request.RunJS  = (rule.RunJS == Status.ON);

            var response = Crawler.Request(request);

            if (response != null && response.Data != null)
            {
                var content = response.Data.ToString();
                var block   = RuiJiBlockParser.ParserBlock(rule.RuiJiExpression);
                var r       = new ExtractRequest();
                r.Content = content;

                r.Blocks = new List <ExtractFeatureBlock> {
                    new ExtractFeatureBlock(block, rule.Feature)
                };

                var results = Extractor.Extract(r);

                var result = results.OrderByDescending(m => m.Metas.Count).FirstOrDefault();

                if (result.Paging != null && result.Paging.Count > 0 && result.Metas != null && result.Metas.ContainsKey("content"))
                {
                    result = PagingExtractor.MergeContent(new Uri(rule.Url), result, block);
                }

                if (!debug)
                {
                    CrawlTaskFunc.ClearContent(result);
                }

                return(result);
            }

            return(new { });
        }
コード例 #27
0
ファイル: GUIDModel.cs プロジェクト: mrussek/Recognizers-Text
        public override List <ModelResult> Parse(string query)
        {
            var parsedSequences = new List <ParseResult>();

            // Preprocess the query
            query = QueryProcessor.Preprocess(query);

            try
            {
                var extractResults = Extractor.Extract(query);

                foreach (var result in extractResults)
                {
                    parsedSequences.Add(Parser.Parse(result));
                }
            }
            catch (Exception)
            {
                // Nothing to do. Exceptions in parse should not break users of recognizers.
                // No result.
            }

            return(parsedSequences.Select(o => new ModelResult
            {
                Start = o.Start.Value,
                End = o.Start.Value + o.Length.Value - 1,
                Resolution = new SortedDictionary <string, object>
                {
                    {
                        ResolutionKey.Value, o.ResolutionStr
                    },
                    {
                        ResolutionKey.Score, o.Value.ToString()
                    },
                },
                Text = o.Text,
                TypeName = ModelTypeName,
            }).ToList());
        }
コード例 #28
0
        /// <summary>
        ///     Extracts an BZip2 file contained in fileEntry.
        /// </summary>
        /// <param name="fileEntry"> FileEntry to extract </param>
        /// <returns> Extracted files </returns>
        public async IAsyncEnumerable <FileEntry> ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor)
        {
            BZip2Stream?bzip2Stream = null;

            try
            {
                bzip2Stream = new BZip2Stream(fileEntry.Content, SharpCompress.Compressors.CompressionMode.Decompress, false);
                governor.CheckResourceGovernor(bzip2Stream.Length);
            }
            catch (Exception e)
            {
                Logger.Debug(Extractor.DEBUG_STRING, ArchiveFileType.BZIP2, fileEntry.FullPath, string.Empty, e.GetType());
            }
            if (bzip2Stream != null)
            {
                var newFilename  = Path.GetFileNameWithoutExtension(fileEntry.Name);
                var newFileEntry = await FileEntry.FromStreamAsync(newFilename, bzip2Stream, fileEntry);

                if (Extractor.IsQuine(newFileEntry))
                {
                    Logger.Info(Extractor.IS_QUINE_STRING, fileEntry.Name, fileEntry.FullPath);
                    bzip2Stream.Dispose();
                    throw new OverflowException();
                }

                await foreach (var extractedFile in Context.ExtractAsync(newFileEntry, options, governor))
                {
                    yield return(extractedFile);
                }
                bzip2Stream.Dispose();
            }
            else
            {
                if (options.ExtractSelfOnFail)
                {
                    yield return(fileEntry);
                }
            }
        }
コード例 #29
0
    /// <summary>
    /// Extracts item with given inserter.
    /// Only decrements this inventory, does not add to the inserter's inventory.
    /// Returns the item if successful, else null;
    /// </summary>
    /// <param name="item"></param>
    /// <param name="inserter"></param>
    /// <returns></returns>
    public GenericItem ExtractItem(GenericItem item, Extractor inserter)
    {
        //Side the inserter is using
        InsertSide insert_side = inserter.OUTPUT_SIDE;
        InsertDir  insert_dir  = GetInsertDir(inserter.GetComponent <GridController>());
        BeltSide   side        = GetBeltSide(insert_dir, insert_side);
        BeltPos    pos         = GetBeltPos(insert_dir, insert_side);

        //Get the relevant list.
        List <ItemController> LIST = GetSideList(side);

        //Get the relevant index.
        int index = GetIndex(side, pos);

        //Save the item to be returned
        GenericItem item_ret = LIST[(int)pos].INFO;

        //Update side list.
        LIST[(int)pos].DespawnObject();
        LIST[(int)pos] = null;
        return(item_ret);
    }
コード例 #30
0
        private static async Task RunExtractorAndDownloader(Config config, string courseUrl)
        {
            Console.WriteLine(TUI.CONTINUEGLYPH + "Extracting Course Data. This might take some time...");
            var extractor = new Extractor(courseUrl, config.Quality, config.AuthenticationToken);

            if (!extractor.HasValidUrl())
            {
                TUI.ShowError("The course url you provided is not a recognized valid Linkedin Learning link");
                await RunWithConfig(config);

                return;
            }
            if (!await extractor.HasValidToken())
            {
                TUI.ShowError("The token you provided is not valid");
                await RunWithoutConfig();

                return;
            }
            Course course;

            try
            {
                using var pbarExtractor = new ProgressBar(10000, "Extracting Course Links - This will take some time", optionPbarExtractor);
                course = await extractor.GetCourse(pbarExtractor.AsProgress <float>());
            }
            catch (Exception ex)
            {
                TUI.ShowError(ex.Message);
                Log.Error(ex, ex.Message);
                await RunWithoutConfig();

                return;
            }
            Console.WriteLine(TUI.ENDGLYPH + "Course Extracted Successfully");
            Log.Information("Course Extracted. Downloading...");
            Console.WriteLine();
            CourseDownloader.DownloadCourse(course, config.CourseDirectory);
        }
コード例 #31
0
        public void TestDateTimeParser()
        {
            string message;

            if (TestUtils.EvaluateSpec(TestSpec, out message))
            {
                Assert.Inconclusive(message, GetMessage(TestSpec));
            }

            if (Debugger.IsAttached && TestSpec.Debug)
            {
                Debugger.Break();
            }

            var referenceDateTime = TestSpec.GetReferenceDateTime();

            var extractResults = Extractor.Extract(TestSpec.Input);
            var actualResults  = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray();

            var expectedResults = TestSpec.CastResults <DateTimeParseResult>();

            Assert.AreEqual(expectedResults.Count(), actualResults.Count(), GetMessage(TestSpec));

            foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create))
            {
                var expected = tuple.Item1;
                var actual   = tuple.Item2;
                Assert.AreEqual(expected.Type, actual.Type, GetMessage(TestSpec));
                Assert.AreEqual(expected.Text, actual.Text, GetMessage(TestSpec));

                var actualValue   = actual.Value as DateTimeResolutionResult;
                var expectedValue = JsonConvert.DeserializeObject <DateTimeResolutionResult>(expected.Value.ToString());

                Assert.IsNotNull(actualValue, GetMessage(TestSpec));
                Assert.AreEqual(expectedValue.Timex, actualValue.Timex, GetMessage(TestSpec));
                CollectionAssert.AreEqual(expectedValue.FutureResolution, actualValue.FutureResolution, GetMessage(TestSpec));
                CollectionAssert.AreEqual(expectedValue.PastResolution, actualValue.PastResolution, GetMessage(TestSpec));
            }
        }
コード例 #32
0
ファイル: Form1.cs プロジェクト: alex-ded-sd/pathextractor
 private void button2_Click(object sender, EventArgs e)
 {
     if (folderBrowserDialog2.ShowDialog() == DialogResult.OK)
     {
         try
         {
             IEnumerable <FileInfo> filesPaths =
                 Extractor.GetFiles(folderBrowserDialog2.SelectedPath, new[] { "*.mp3", "*.jpg" });
             Translit.TranslitFileNames(filesPaths);
             MessageBox.Show("Слава Богу, ошибок нет! Всё сработало", "",
                             MessageBoxButtons.OK, MessageBoxIcon.Information);
         }
         catch
         {
             MessageBox.Show("Ошибка в ядре", "Упс", MessageBoxButtons.OK, MessageBoxIcon.Error);
         }
     }
     else
     {
         MessageBox.Show("Что то пошло не так", "Упс", MessageBoxButtons.OK, MessageBoxIcon.Error);
     }
 }
コード例 #33
0
        public byte[] this[int offset]
        {
            get
            {
                if (offset > 0 && offset < 8)
                {
                    int    l = (8 - offset);
                    byte[] r = new byte[l];

                    fixed(byte *pbyte = bytes)
                    fixed(byte *rbyte = r)
                    Extractor.CopyBlock(rbyte, pbyte + offset, l);

                    return(r);
                }
                return(GetBytes());
            }
            set
            {
                int l = value.Length;
                if (offset > 0 || l < 8)
                {
                    int count = 8 - offset;
                    if (l < count)
                        count = l;
                    fixed(byte *pbyte = bytes)
                    fixed(byte *rbyte = value)
                    {
                        Extractor.CopyBlock(pbyte, rbyte, offset, l);
                    }
                }
                else
                {
                    fixed(byte *v = value)
                    fixed(byte *b = bytes)
                    * (ulong *)b  = *(ulong *)v;
                }
            }
        }
コード例 #34
0
        public void ExtractMembers_StructWithConstructors_ExtractsConstructorIncludingParameterless()
        {
            IExtractor extractor = new Extractor(typeof(StructWithConstructors));

            const string interfaceName = "Custom.Namespace.IMyStruct";
            const string implementationName = "Custom.Namespace.Impl";
            var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName);
            var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName);
            write_output(actualInterface, actualImplementation);

            var expected = @"
            namespace Custom.Namespace
            {
            public interface IMyStruct
            {
            }
            }";
            Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation");

            expected = @"
            namespace Custom.Namespace
            {
            public class Impl : Custom.Namespace.IMyStruct
            {
            private readonly Core.Fixtures.Fakes.StructWithConstructors instance;

            public Impl(System.Int32 value)
            {
            instance = new Core.Fixtures.Fakes.StructWithConstructors(value);
            }

            public Impl(System.Int32 value1, System.Int32 value2)
            {
            instance = new Core.Fixtures.Fakes.StructWithConstructors(value1, value2);
            }

            public override System.Boolean Equals(System.Object obj)
            {
            return instance.Equals(obj);
            }
            public override System.Int32 GetHashCode()
            {
            return instance.GetHashCode();
            }
            public override System.String ToString()
            {
            return instance.ToString();
            }

            public Impl()
            {
            instance = new Core.Fixtures.Fakes.StructWithConstructors();
            }
            }
            }";
            Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation");
        }
コード例 #35
0
        public void ExtractMembers_TypeOfDateTime_JustWriteItToConsole()
        {
            IExtractor extractor = new Extractor(typeof (DateTime));

            var interfaceName = "Custom.Namespace.IDateTime";
            var implementationName = "Custom.Namespace.DateTimeImplementation";
            var extracted = extractor.ExtractInterfaceForInstanceMembers(interfaceName);
            Console.WriteLine(extracted);
            extracted = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName);
            Console.WriteLine(extracted);

            interfaceName = "Custom.Namespace.IDateTimeStatics";
            implementationName = "Custom.Namespace.DateTimeStaticsImplementation";
            extracted = extractor.ExtractInterfaceForStaticMembers(interfaceName);
            Console.WriteLine(extracted);
            extracted = extractor.ExtractImplementationForStaticMembers(interfaceName, implementationName);
            Console.WriteLine(extracted);

            Console.WriteLine();
        }
コード例 #36
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
        /// <summary>
        /// 
        /// </summary>
        /// <param name="entity"></param>
        /// <param name="symbol"></param>
        /// <param name="text"></param>
        /// <param name="attributes"></param>
        /// <param name="builder"></param>
        public void LinkToTextWithSymbol(Extractor.Entity entity, string symbol, string text, IDictionary<string, string> attributes, StringBuilder builder)
        {
            string taggedSymbol = string.IsNullOrWhiteSpace(SymbolTag) ? symbol : string.Format("<{0}>{1}</{0}>", SymbolTag, symbol);
            text = EscapeHTML(text);
            string taggedText = string.IsNullOrWhiteSpace(TextWithSymbolTag) ? text : string.Format("<{0}>{1}</{0}>", TextWithSymbolTag, text);
            bool includeSymbol = UsernameIncludeSymbol || !Regex.AT_SIGNS.IsMatch(symbol);

            if (includeSymbol)
            {
                LinkToText(entity, taggedSymbol.ToString() + taggedText, attributes, builder);
            }
            else
            {
                builder.Append(taggedSymbol);
                LinkToText(entity, taggedText, attributes, builder);
            }
        }
コード例 #37
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
 /// <summary>
 /// Initializes a new instance of the <see cref="Autolink"/> class.
 /// </summary>
 /// <param name="linkURLWithoutProtocol">Whether or not to link urls without a protocol</param>
 public Autolink(bool linkURLWithoutProtocol)
 {
     UrlClass = null;
     ListClass = DEFAULT_LIST_CLASS;
     UsernameClass = DEFAULT_USERNAME_CLASS;
     HashtagClass = DEFAULT_HASHTAG_CLASS;
     CashtagClass = DEFAULT_CASHTAG_CLASS;
     UsernameUrlBase = DEFAULT_USERNAME_URL_BASE;
     ListUrlBase = DEFAULT_LIST_URL_BASE;
     HashtagUrlBase = DEFAULT_HASHTAG_URL_BASE;
     CashtagUrlBase = DEFAULT_CASHTAG_URL_BASE;
     InvisibleTagAttrs = DEFAULT_INVISIBLE_TAG_ATTRS;
     NoFollow = true;
     __Extractor = new Extractor { ExtractURLWithoutProtocol = linkURLWithoutProtocol };
 }
コード例 #38
0
ファイル: ExtractTests.cs プロジェクト: CRuppert/NTwitterText
 public void ExtractsUrlsCorrectly(SimpleTwitterCase testCase)
 {
     Extractor testExtractor = new Extractor();
     var m = testExtractor.ExtractURLs(testCase.TestString);
     Assert.AreEqual(testCase.ExpectedTags.Count(), m.Count, testCase.Description);
 }
コード例 #39
0
 public static IDictionary<string, string> MakePackageMetadata(JObject catalogItem)
 {
     var extractor = new Extractor();
     return extractor.Extract(catalogItem);
 }
コード例 #40
0
ファイル: ExtractTests.cs プロジェクト: CRuppert/NTwitterText
 public void ExtractsHashtagsCorrectly(SimpleTwitterCase testCase)
 {
     Extractor testExtractor = new Extractor();
     var m = testExtractor.ExtractHashtags(testCase.TestString);
     Assert.AreEqual(testCase.ExpectedTags.Count(), m.Count, testCase.Description);
     foreach (var tag in m)
     {
         Assert.Contains(tag, testCase.ExpectedTags, tag + " Not Found");
     }
 }
コード例 #41
0
 public static IDictionary<string, string> MakePackageMetadata(Package package)
 {
     var extractor = new Extractor();
     return extractor.Extract(package);
 }
コード例 #42
0
        public void ExtractInstanceMembers_ClassWithMethods()
        {
            IExtractor extractor = new Extractor(typeof(ClassWithMethods));

            const string interfaceName = "Custom.Namespace.IMyClass";
            const string implementationName = "Custom.Namespace.SMyClass";
            var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName);
            var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName);
            write_output(actualInterface, actualImplementation);

            var expected = @"
            namespace Custom.Namespace
            {
            public interface IMyClass
            {
            void MultipleParameters(System.String value1, System.Int32 value2);
            void OutParameter(out System.String value);
            void RefParameter(ref System.String value);
            System.Int32 Simple();
            }
            }";
            Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation");
            expected = @"
            namespace Custom.Namespace
            {
            public class SMyClass : Custom.Namespace.IMyClass
            {
            private readonly Core.Fixtures.Fakes.ClassWithMethods instance;

            public SMyClass()
            {
            instance = new Core.Fixtures.Fakes.ClassWithMethods();
            }

            public void MultipleParameters(System.String value1, System.Int32 value2)
            {
            instance.MultipleParameters(value1, value2);
            }
            public void OutParameter(out System.String value)
            {
            instance.OutParameter(out value);
            }
            public void RefParameter(ref System.String value)
            {
            instance.RefParameter(ref value);
            }
            public System.Int32 Simple()
            {
            return instance.Simple();
            }

            public override System.Boolean Equals(System.Object obj)
            {
            return instance.Equals(obj);
            }
            public override System.Int32 GetHashCode()
            {
            return instance.GetHashCode();
            }
            public override System.String ToString()
            {
            return instance.ToString();
            }
            }
            }";
            Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation");
        }
コード例 #43
0
ファイル: Crawler.cs プロジェクト: repne/happyface
        protected Crawler(IDocumentFactory documentFactory, IKeyValueStore<string, Result> store, IKeyValueStore<string, FetchTarget> frontier)
        {
            _store = store;
            _frontier = frontier;

            var fetcherOptions = new FetcherOptions
            {
                UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
            };

            var parserOptions = new ParserOptions
            {
            };

            var scraperOptions = new ScraperOptions
            {
            };

            var extractorOptions = new ExtractorOptions
            {
            };

            //var storerOptions = new StorerOptions
            //{
            //};

            var builderOptions = new BuilderOptions
            {
            };

            var providerOptions = new ProviderOptions
            {
            };

            //var dispatcherOptions = new DispatcherOptions
            //{
            //};

            Fetcher = new Fetcher(fetcherOptions);
            Parser = new Parser(parserOptions, documentFactory);
            Scraper = new Scraper(scraperOptions);
            Extractor = new Extractor(extractorOptions);
            Storer = new Storer(store);
            Builder = new Builder(builderOptions);
            Provider = new Provider(providerOptions, store, frontier);
            Dispatcher = new Dispatcher();

            Fetcher.SendTo(Parser, x => x.StatusCode == System.Net.HttpStatusCode.OK);

            Parser.SendTo(Scraper);
            Parser.SendTo(Extractor);

            Fetcher.SendTo(Builder, x => x.StatusCode == System.Net.HttpStatusCode.OK);
            Scraper.SendTo(Builder);
            Extractor.SendTo(Builder);

            Builder.SendTo(Storer);

            //Storer.LinkTo(new ActionBlock<Result>(x =>
            //{
            //}));

            Builder.SendTo(Provider);
            Provider.SendTo(Dispatcher, x => x != null);
            Dispatcher.SendTo(Fetcher);
        }
コード例 #44
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
        /// <summary>
        /// 
        /// </summary>
        /// <param name="entity"></param>
        /// <param name="text"></param>
        /// <param name="builder"></param>
        public void LinkToCashtag(Extractor.Entity entity, string text, StringBuilder builder)
        {
            string cashtag = entity.Value;

            IDictionary<string, string> attrs = new Dictionary<string, string>();
            attrs["href"] = CashtagUrlBase + cashtag;
            attrs["title"] = "$" + cashtag;
            attrs["class"] = CashtagClass;

            LinkToTextWithSymbol(entity, "$", cashtag, attrs, builder);
        }
コード例 #45
0
        public void ExtractInstanceMembers_SourceType_SubstituteCreatedType()
        {
            IExtractor extractor = new Extractor(typeof(ClassThatHasAReturnTypeOfSelf));

            const string interfaceName = "Custom.Namespace.IClassThatHasAReturnTypeOfSelf";
            const string implementationName = "Custom.Namespace.ClassWithMethodsImplementation";
            var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName);
            var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName);
            write_output(actualInterface, actualImplementation);

            var expected = @"
            namespace Custom.Namespace
            {
            public interface IClassThatHasAReturnTypeOfSelf
            {
            Custom.Namespace.IClassThatHasAReturnTypeOfSelf ReturnSameType();
            }
            }";
            Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation");
            expected = @"
            namespace Custom.Namespace
            {
            public class ClassWithMethodsImplementation : Custom.Namespace.IClassThatHasAReturnTypeOfSelf
            {
            private readonly Core.Fixtures.Fakes.ClassThatHasAReturnTypeOfSelf instance;

            public ClassWithMethodsImplementation()
            {
            instance = new Core.Fixtures.Fakes.ClassThatHasAReturnTypeOfSelf();
            }

            public ClassWithMethodsImplementation(Core.Fixtures.Fakes.ClassThatHasAReturnTypeOfSelf instance)
            {
            this.instance = instance;
            }

            public Custom.Namespace.IClassThatHasAReturnTypeOfSelf ReturnSameType()
            {
            return new ClassWithMethodsImplementation(instance);
            }

            public override System.Boolean Equals(System.Object obj)
            {
            return instance.Equals(obj);
            }
            public override System.Int32 GetHashCode()
            {
            return instance.GetHashCode();
            }
            public override System.String ToString()
            {
            return instance.ToString();
            }
            }
            }";
            Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation");
        }
コード例 #46
0
ファイル: ExtractTests.cs プロジェクト: CRuppert/NTwitterText
 public void PassesTwitterIndexesForHashTags(IndexedTwitterTest testCase)
 {
     Extractor testExtractor = new Extractor();
     var m = testExtractor.ExtractHashtags(testCase.TestString);
     foreach (var expect in testCase.Expectations)
     {
         Assert.Fail("NotImplemented");
     }
 }
コード例 #47
0
        public void ExtractInstanceMembers_SimpleClassWithNoImplementation_OverrideMembersFromObject()
        {
            IExtractor extractor = new Extractor(typeof(SimpleClass));

            const string interfaceName = "Custom.Namespace.IMyClass";
            const string implementationName = "Custom.Namespace.SMyClass";
            var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName);
            var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName);
            write_output(actualInterface, actualImplementation);

            var expected = @"
            namespace Custom.Namespace
            {
            public interface IMyClass
            {
            }
            }";
            Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation");

            expected = @"
            namespace Custom.Namespace
            {
            public class SMyClass : Custom.Namespace.IMyClass
            {
            private readonly Core.Fixtures.Fakes.SimpleClass instance;

            public SMyClass()
            {
            instance = new Core.Fixtures.Fakes.SimpleClass();
            }

            public override System.Boolean Equals(System.Object obj)
            {
            return instance.Equals(obj);
            }
            public override System.Int32 GetHashCode()
            {
            return instance.GetHashCode();
            }
            public override System.String ToString()
            {
            return instance.ToString();
            }
            }
            }";
            Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation");
        }
コード例 #48
0
        public void ExtractStaticMembers_StaticClass_DoNotExtractAnyInstanceMembers()
        {
            IExtractor extractor = new Extractor(typeof(StaticClass));

            const string interfaceName = "Custom.Namespace.IStaticMyClass";
            const string implementationName = "Custom.Namespace.SStaticMyClass";
            var actualInterface = extractor.ExtractInterfaceForStaticMembers(interfaceName);
            var actualImplementation = extractor.ExtractImplementationForStaticMembers(interfaceName, implementationName);
            write_output(actualInterface, actualImplementation);

            var expected = @"
            namespace Custom.Namespace
            {
            public interface IStaticMyClass
            {
            void DoSomething();
            }
            }";
            Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation");

            expected = @"
            namespace Custom.Namespace
            {
            public class SStaticMyClass : Custom.Namespace.IStaticMyClass
            {
            public void DoSomething()
            {
            Core.Fixtures.Fakes.StaticClass.DoSomething();
            }
            }
            }";
            Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation");
        }
コード例 #49
0
ファイル: Extractor.cs プロジェクト: nerochiaro/webcontent-cs
 public bool Extract(StringDict args)
 {
     Extractor ex = new Extractor();
     return ex.Extract(this, args);
 }
コード例 #50
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
 /// <summary>
 /// 
 /// </summary>
 /// <param name="entity"></param>
 /// <param name="text"></param>
 /// <param name="attributes"></param>
 /// <param name="builder"></param>
 public void LinkToText(Extractor.Entity entity, string text, IDictionary<string, string> attributes, StringBuilder builder)
 {
     if (NoFollow)
     {
         attributes["rel"] = "nofollow";
     }
     if (LinkAttributeModifier != null)
     {
         LinkAttributeModifier.Modify(entity, attributes);
     }
     if (LinkTextModifier != null)
     {
         text = LinkTextModifier.Modify(entity, text);
     }
     // append <a> tag
     builder.Append("<a");
     foreach (var entry in attributes)
     {
         builder.Append(" ").Append(EscapeHTML(entry.Key)).Append("=\"").Append(EscapeHTML(entry.Value)).Append("\"");
     }
     builder.Append(">").Append(text).Append("</a>");
 }
コード例 #51
0
 public void AutolinkJsonTest()
 {
     Extractor extractor = new Extractor();
     List<string> failures = new List<string>();
     foreach (dynamic test in LoadTestSection<string>("json"))
     {
         string actual = string.Empty;
         try
         {
             List<Extractor.Entity> entities = extractor.ExtractEntitiesWithIndices(test.text);
             foreach (Extractor.Entity entity in entities)
             {
                 if (entity.Type == Extractor.EntityType.Url)
                 {
                     entity.DisplayURL = "twitter.com";
                     entity.ExpandedURL = "http://twitter.com/";
                 }
             }
             actual = autolink.AutoLinkEntities(test.text, entities);
             Assert.AreEqual(test.expected, actual);
         }
         catch (Exception)
         {
             failures.Add(string.Format("\n{0}: {1}\n\tExpected: {2}\n\t  Actual: {3}", test.description, test.text, test.expected, actual));
         }
     }
     if (failures.Any())
     {
         Assert.Fail(string.Join("\n", failures));
     }
 }
コード例 #52
0
ファイル: ExtractTests.cs プロジェクト: CRuppert/NTwitterText
        public void ExtractsRepliesCorrectly(SimpleTwitterCase testCase)
        {
            Extractor testExtractor = new Extractor();
            var m = testExtractor.ExtractReplyScreenname(testCase.TestString);

            if (m == null)
            {
                Assert.IsEmpty(testCase.ExpectedTags.First(), testCase.Description);
            }
            else
            {
                Assert.AreEqual(testCase.ExpectedTags.First(), m, testCase.Description);
            }
        }
コード例 #53
0
ファイル: RankerTest.cs プロジェクト: eumagnun/iwebcrawler
        public void Test2()
        {
            List<String> urls = new List<string>();
            urls.Add("http://www.autonews.com/");
            urls.Add("http://www.geonius.com/www/");
            urls.Add("http://en.wikipedia.org/wiki/Main_Page");
            urls.Add("http://www.computerworld.com/");
            List<string> seeds = StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
            foreach (string seed in seeds)
            {
                urls.Add(seed);
            }

            List<Category> _categories;
            Constraints _constraints;

            _categories = StorageSystem.StorageSystem.getInstance().getCategories(taskId);
            _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId);

            StorageSystem.StorageSystem.getInstance().getSeedList(taskId);
            Filter filter = new Filter("http://", _constraints);
            Categorizer categorizer = new Categorizer(_categories);
            Ranker ranker = new Ranker(categorizer);
            Extractor extractor = new Extractor();

            HttpResourceFetcher httpfetcher = new HttpResourceFetcher();

            foreach (String url in urls)
            {
                DateTime startTime = DateTime.Now;
                ResourceContent resource = null;
                if (httpfetcher.canFetch(url))
                    resource = httpfetcher.fetch(url, 10000, 100);

                DateTime fetchEndTime = DateTime.Now;

                if ((resource == null)||(resource.getResourceContent()==null))
                    continue;

                /*** 0. fetching the link from the internet ***/
                TimeSpan fetchingTime = fetchEndTime - startTime;

                List<LinkItem> listOfLinks = new List<LinkItem>();
                //extract all the links in page
                listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent());
                RuntimeStatistics.addToExtractedUrls(listOfLinks.Count);

                DateTime extEndTime = DateTime.Now;

                /*** 1. Extracting the link from the request ***/
                TimeSpan extRequest = extEndTime - fetchEndTime;

                //reset the dictionary in filter that contains the urls from the same page
                filter.resetDictionary();
                int filteredUrlsCount = 0;
                foreach (LinkItem item in listOfLinks)
                {
                    //Filter the links and return only links that can be crawled
                    List<String> links = new List<String>();
                    links.Add(item.getLink());
                    List<String> filteredLinks = filter.filterLinks(links);

                    //If filteredLinks is not empty
                    if (filteredLinks.Count > 0)
                    {
                        filteredUrlsCount++;
                        Url url1 = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item),
                                          item.getDomainUrl(), hashUrl(item.getDomainUrl()));
                        deployLinksToFrontier(url1);
                        RuntimeStatistics.addToFeedUrls(1);
                    }
                }

                DateTime catStartTime = DateTime.Now;

                /*** 2. Ranking and deployment to the frontier ***/
                TimeSpan rankTotalRequest = catStartTime - extEndTime;

                //Ascribe the url to all the categories it is belonged to.
                List<Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(),
                                                                                resource.getResourceUrl());
                if (classifiedResults.Count != 0) RuntimeStatistics.addToCrawledUrls(1);

                DateTime catEndTime = DateTime.Now;

                /*** 3. Classification of the current request ***/
                TimeSpan catTotalRequest = catEndTime - catStartTime;

                foreach (Result classifiedResult in classifiedResults)
                {
                     Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(),
                                 resource.getRankOfUrl(), classifiedResult.getTrustMeter());
                     deployResourceToStorage(result);
                }

                DateTime endTime = DateTime.Now;

                /*** 4. deployment to the database (result) ***/
                TimeSpan deployRequest = endTime - catEndTime;

                /*** 5. Total processing time ***/
                TimeSpan totalRequest = endTime - startTime;
            }
        }
コード例 #54
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
        /// <summary>
        /// 
        /// </summary>
        /// <param name="entity"></param>
        /// <param name="text"></param>
        /// <param name="builder"></param>
        public void LinkToMentionAndList(Extractor.Entity entity, string text, StringBuilder builder)
        {
            string mention = entity.Value;
            // Get the original at char from text as it could be a full-width char.
            string atChar = text.Substring(entity.Start, 1);

            IDictionary<string, string> attrs = new Dictionary<string, string>();
            if (entity.ListSlug != null)
            {
                mention += entity.ListSlug;
                attrs["class"] = ListClass;
                attrs["href"] = ListUrlBase + mention;
            }
            else
            {
                attrs["class"] = UsernameClass;
                attrs["href"] = UsernameUrlBase + mention;
            }

            LinkToTextWithSymbol(entity, atChar, mention, attrs, builder);
        }
コード例 #55
0
        public void ExtractStaticMembers_ClassWithStatics_ExtractsStatics()
        {
            IExtractor extractor = new Extractor(typeof(ClassWithStatics));

            const string interfaceName = "Custom.Namespace.IStaticMyClass";
            const string implementationName = "Custom.Namespace.SStaticMyClass";
            var actualInterface = extractor.ExtractInterfaceForStaticMembers(interfaceName);
            var actualImplementation = extractor.ExtractImplementationForStaticMembers(interfaceName, implementationName);
            write_output(actualInterface, actualImplementation);

            var expected = @"
            namespace Custom.Namespace
            {
            public interface IStaticMyClass
            {
            System.Int32 DoSomething();
            System.Int32 GetProperty { get; }
            System.Int32 GetSetProperty { get; set; }
            System.Int32 SetProperty { set; }
            }
            }";
            Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation");

            expected = @"
            namespace Custom.Namespace
            {
            public class SStaticMyClass : Custom.Namespace.IStaticMyClass
            {
            private readonly Core.Fixtures.Fakes.ClassWithStatics instance;

            public System.Int32 DoSomething()
            {
            return Core.Fixtures.Fakes.ClassWithStatics.DoSomething();
            }

            public System.Int32 GetProperty
            {
            get
            {
                return Core.Fixtures.Fakes.ClassWithStatics.GetProperty;
            }
            }

            public System.Int32 GetSetProperty
            {
            get
            {
                return Core.Fixtures.Fakes.ClassWithStatics.GetSetProperty;
            }
            set
            {
                Core.Fixtures.Fakes.ClassWithStatics.GetSetProperty = value;
            }
            }

            public System.Int32 SetProperty
            {
            set
            {
                Core.Fixtures.Fakes.ClassWithStatics.SetProperty = value;
            }
            }
            }
            }";
            Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation");
        }
コード例 #56
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
        /// <summary>
        /// 
        /// </summary>
        /// <param name="entity"></param>
        /// <param name="text"></param>
        /// <param name="builder"></param>
        public void LinkToURL(Extractor.Entity entity, string text, StringBuilder builder)
        {
            string url = entity.Value;
            string linkText = EscapeHTML(url);

            if (entity.DisplayURL != null && entity.ExpandedURL != null)
            {
                // Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
                // should contain the full original URL (expanded_url), not the display URL.
                //
                // Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
                // font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
                // Elements with font-size:0 get copied even though they are not visible.
                // Note that display:none doesn't work here. Elements with display:none don't get copied.
                //
                // Additionally, we want to *display* ellipses, but we don't want them copied.  To make this happen we
                // wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
                // everything with the tco-ellipsis class.
                //
                // As an example: The user tweets "hi http://longdomainname.com/foo"
                // This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
                // This will get rendered as:
                // <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
                //   …
                //   <!-- There's a chance the onCopy event handler might not fire. In case that happens,
                //        we include an &nbsp; here so that the … doesn't bump up against the URL and ruin it.
                //        The &nbsp; is inside the tco-ellipsis span so that when the onCopy handler *does*
                //        fire, it doesn't get copied.  Otherwise the copied text would have two spaces in a row,
                //        e.g. "hi  http://longdomainname.com/foo".
                //   <span style='font-size:0'>&nbsp;</span>
                // </span>
                // <span style='font-size:0'>  <!-- This stuff should get copied but not displayed -->
                //   http://longdomai
                // </span>
                // <span class='js-display-url'> <!-- This stuff should get displayed *and* copied -->
                //   nname.com/foo
                // </span>
                // <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
                //   <span style='font-size:0'>&nbsp;</span>
                //   …
                // </span>
                //
                // Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1
                // For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
                // For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
                string displayURLSansEllipses = entity.DisplayURL.Replace("…", "");
                int diplayURLIndexInExpandedURL = entity.ExpandedURL.IndexOf(displayURLSansEllipses);
                if (diplayURLIndexInExpandedURL != -1)
                {
                    string beforeDisplayURL = entity.ExpandedURL.Substring(0, diplayURLIndexInExpandedURL);
                    string afterDisplayURL = entity.ExpandedURL.Substring(diplayURLIndexInExpandedURL + displayURLSansEllipses.Length);
                    string precedingEllipsis = entity.DisplayURL.StartsWith("…") ? "…" : "";
                    string followingEllipsis = entity.DisplayURL.EndsWith("…") ? "…" : "";
                    string invisibleSpan = "<span " + InvisibleTagAttrs + ">";

                    StringBuilder sb = new StringBuilder("<span class='tco-ellipsis'>");
                    sb.Append(precedingEllipsis);
                    sb.Append(invisibleSpan).Append("&nbsp;</span></span>");
                    sb.Append(invisibleSpan).Append(EscapeHTML(beforeDisplayURL)).Append("</span>");
                    sb.Append("<span class='js-display-url'>").Append(EscapeHTML(displayURLSansEllipses)).Append("</span>");
                    sb.Append(invisibleSpan).Append(EscapeHTML(afterDisplayURL)).Append("</span>");
                    sb.Append("<span class='tco-ellipsis'>").Append(invisibleSpan).Append("&nbsp;</span>").Append(followingEllipsis).Append("</span>");

                    linkText = sb.ToString();
                }
                else
                {
                    linkText = entity.DisplayURL;
                }
            }

            IDictionary<string, string> attrs = new Dictionary<string, string>();
            attrs["href"] = url;

            if (!string.IsNullOrWhiteSpace(entity.DisplayURL) && !string.IsNullOrWhiteSpace(entity.ExpandedURL))
            {
                attrs["title"] = entity.ExpandedURL;
            }

            if (!string.IsNullOrWhiteSpace(UrlClass))
            {
                attrs["class"] = UrlClass;
            }

            if (!string.IsNullOrWhiteSpace(UrlTarget))
            {
                attrs["target"] = UrlTarget;
            }

            LinkToText(entity, linkText, attrs, builder);
        }
コード例 #57
0
ファイル: SentenceGetter.cs プロジェクト: 89sos98/iveely
        private string[] GetValuesByRules(List<WordResult[]> words, Extractor extractor)
        {
            HashSet<string> bodys = new HashSet<string>();
            string answer = string.Empty;
            bool shoudContine = false;
            string lastType = "no-type";

            for (int i = 0; i < words.Count; i++)
            {
                for (int j = 1; j < words[i].Length - 1; j++)
                {
                    string type = Utility.GetPOSString(words[i][j].nPOS).Trim();
                    if ((extractor.Signs.Contains(type) && extractor.FrontAllowSigns.Contains(lastType)) || shoudContine)
                    {
                        if (j < words[i].Length - 2)
                        {
                            string nextType = Utility.GetPOSString(words[i][j + 1].nPOS);

                            if (extractor.NextContinueSigns.Contains(nextType))
                            {
                                if (!shoudContine)
                                {
                                    lastType = type;
                                }
                                answer += words[i][j].sWord;
                                shoudContine = true;
                                continue;
                            }
                            if (!extractor.NextBanSigns.Contains(nextType))
                            {
                                answer += words[i][j].sWord;
                                if (answer != string.Empty && !bodys.Contains(answer))
                                {
                                    bodys.Add(answer);
                                    answer = string.Empty;
                                    shoudContine = false;
                                    continue;
                                }
                            }
                            else
                            {
                                if (answer != string.Empty && !bodys.Contains(answer))
                                {
                                    bodys.Add(answer);
                                    answer = string.Empty;
                                    shoudContine = false;
                                }
                                continue;
                            }
                        }
                        shoudContine = false;
                        answer += words[i][j].sWord;
                        if (answer != string.Empty && !bodys.Contains(answer))
                        {
                            bodys.Add(answer);
                            answer = string.Empty;
                            continue;
                        }
                    }
                    if (answer != string.Empty && !bodys.Contains(answer))
                    {
                        bodys.Add(answer);
                        answer = string.Empty;
                    }

                    shoudContine = false;
                    lastType = type;

                }
            }
            return bodys.ToArray();
        }
コード例 #58
0
ファイル: Autolink.cs プロジェクト: dghry/twitter-text-cs
        /// <summary>
        /// 
        /// </summary>
        /// <param name="entity"></param>
        /// <param name="text"></param>
        /// <param name="builder"></param>
        public void LinkToHashtag(Extractor.Entity entity, string text, StringBuilder builder)
        {
            // Get the original hash char from text as it could be a full-width char.
            string hashChar = text.Substring(entity.Start, 1);
            string hashtag = entity.Value;

            IDictionary<string, string> attrs = new Dictionary<string, string>();
            attrs["href"] = HashtagUrlBase + hashtag;
            attrs["title"] = "#" + hashtag;

            if (Regex.RTL_CHARACTERS.IsMatch(text))
            {
                attrs["class"] = HashtagClass + " rtl";
            }
            else
            {
                attrs["class"] = HashtagClass;
            }

            LinkToTextWithSymbol(entity, hashChar, hashtag, attrs, builder);
        }