public HtmlPageCategorizationProcessor(Initializer initializer,Queue<Url> frontier) { extractor = new Extractor(); categorizer = new Categorizer(initializer.getCategoryList()); ranker = new Ranker(categorizer); filter = new Filter("http://",initializer.getContraints()); queueFronier = frontier; taskId = initializer.getTaskId(); }
public void Can_extract_text() { ////Arrange var pdfStream = GetType().Assembly.GetManifestResourceStream(typeof (ExtractorTests), "sample.pdf"); string result; ////Act using (var extractor = new Extractor()) result = extractor.ExtractToString(pdfStream); ////Assert Assert.That(result.Trim(), Is.EqualTo("hello world")); }
public void TestDateTimeExtractor() { TestPreValidation(); var referenceDateTime = TestSpec.GetReferenceDateTime(); var actualResults = Extractor.Extract(TestSpec.Input, referenceDateTime); var expectedResults = TestSpec.CastResults <ExtractResult>(); Assert.AreEqual(expectedResults.Count(), actualResults.Count, GetMessage(TestSpec)); foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) { var expected = tuple.Item1; var actual = tuple.Item2; Assert.AreEqual(expected.Type, actual.Type, GetMessage(TestSpec)); Assert.AreEqual(expected.Text, actual.Text, GetMessage(TestSpec)); } }
public static async Task <IActionResult> Run( [HttpTrigger(AuthorizationLevel.Function, "get", Route = null)] HttpRequest req, ILogger log) { log.LogInformation("SubscriptionsFunction started"); var authHeader = req.Headers.Single(x => x.Key == "Authorization"); if (string.IsNullOrEmpty(authHeader.Value)) { return(new UnauthorizedObjectResult("Invalid Token")); // return HTTP 401 Unauthorized } var token = authHeader.Value.ToString().Replace("Bearer", "").Trim(); Extractor = new Extractor(); Extractor.Authenticate(token); await Extractor.Load(); return(new OkObjectResult(Extractor.Subscriptions)); }
public List <ModelResult> Parse(string query) { var extractResults = Extractor.Extract(query); var parseNums = new List <ParseResult>(); foreach (var result in extractResults) { parseNums.Add(Parser.Parse(result)); } return(parseNums.Select(o => new ModelResult { Start = o.Start.Value, End = o.Start.Value + o.Length.Value - 1, Resolution = new SortedDictionary <string, object> { { "value", o.ResolutionStr } }, Text = o.Text, TypeName = ModelTypeName }).ToList()); }
public Form1() { InitializeComponent(); button1.Click += (Object c, EventArgs e) => { try { this.Enabled = false; var Url = new Uri(textBox1.Text); var Extraer = new Extractor(Url); MessageBox.Show("Terminó"); this.Enabled = true; } catch(Exception) { this.Enabled = true; MessageBox.Show("Ocurrió un error, intente de nuevo"); } }; }
/// <summary> /// 解析操作 /// </summary> /// <param name="page">页面数据</param> protected override void Handle(Page page) { var datas = Extractor.Extract(page, Model); if (datas == null || datas.Count() == 0) { return; } foreach (var handler in _dataHandlers) { for (int i = 0; i < datas.Count(); ++i) { dynamic data = datas.ElementAt(i); handler.Handle(ref data, page); } } page.AddResultItem(Model.Identity, new Tuple <IModel, IEnumerable <dynamic> >(Model, datas)); }
private void ProcessFile(string filename, BinaryWriter outFile, Extractor item) { m_stderr.WriteLine("Processing " + filename); try { item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE); CustomStream outStream = new CustomStream(new MemoryStream()); Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(outStream, isys_docfilters.IGR_DEVICE_IMAGE_TIF, ""); try { for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Hyland.DocumentFilters.Page page = item.GetPage(pageIndex); try { canvas.RenderPage(page); } finally { page.Close(); } } } finally { canvas.Close(); // Write in-memory stream contents to existing open file (e.g. stdout) long BytesWritten = outStream.writeTo(outFile); m_stderr.WriteLine(BytesWritten + " bytes written to outFile"); } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
public void TestDateTimeMergedParser() { if (TestUtils.EvaluateSpec(TestSpec, out string message)) { Assert.Inconclusive(message); } if (Debugger.IsAttached && TestSpec.Debug) { Debugger.Break(); } var referenceDateTime = TestSpec.GetReferenceDateTime(); var extractResults = Extractor.Extract(TestSpec.Input, referenceDateTime); var actualResults = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray(); var expectedResults = TestSpec.CastResults <DateTimeParseResult>(); Assert.AreEqual(expectedResults.Count(), actualResults.Count(), GetMessage(TestSpec)); foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) { var expected = tuple.Item1; var actual = tuple.Item2; var values = actual.Value as IDictionary <string, object>; if (values != null) { var actualValues = values["values"] as IList <Dictionary <string, string> >; var expectedObj = JsonConvert.DeserializeObject <IDictionary <string, IList <Dictionary <string, string> > > >(expected.Value.ToString()); var expectedValues = expectedObj["values"]; foreach (var results in Enumerable.Zip(expectedValues, actualValues, Tuple.Create)) { CollectionAssert.AreEqual(results.Item1, results.Item2, GetMessage(TestSpec)); } } } }
public void TestDateTimeMergedParser() { TestPreValidation(); var referenceDateTime = TestSpec.GetReferenceDateTime(); var extractResults = Extractor.Extract(TestSpec.Input, referenceDateTime); var actualResults = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray(); var expectedResults = TestSpec.CastResults <DateTimeParseResult>(); Assert.AreEqual(expectedResults.Count(), actualResults.Length, GetMessage(TestSpec)); foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) { var expected = tuple.Item1; var actual = tuple.Item2; Assert.AreEqual(expected.Text, actual.Text, GetMessage(TestSpec)); Assert.AreEqual(expected.Type, actual.Type, GetMessage(TestSpec)); Assert.AreEqual(expected.Start, actual.Start, GetMessage(TestSpec)); Assert.AreEqual(expected.Length, actual.Length, GetMessage(TestSpec)); if (actual.Value is IDictionary <string, object> values) { // Actual ValueSet types should not be modified as that's considered a breaking API change var actualValues = values[ResolutionKey.ValueSet] as IList <Dictionary <string, string> >; var expectedObj = JsonConvert.DeserializeObject <IDictionary <string, IList <Dictionary <string, string> > > >( expected.Value.ToString()); var expectedValues = expectedObj[ResolutionKey.ValueSet]; foreach (var value in expectedValues.Zip(actualValues, Tuple.Create)) { Assert.AreEqual(value.Item1.Count, value.Item2.Count, GetMessage(TestSpec)); CollectionAssert.AreEqual(value.Item1, value.Item2, GetMessage(TestSpec)); } } } }
public async Task TestExtractorExtractAllAsync() { var file = new DirectoryFile(TestFile); var fullDir = Path.Combine(ExtractPath, Path.GetFileNameWithoutExtension(file.FilePath) + Path.DirectorySeparatorChar); Directory.CreateDirectory(fullDir); int entriesToRead = file.EntryBlocks.Sum(b => b.Entries.Count); int entriesToDecompress = file.EntryBlocks.SelectMany(b => b.Entries.Where(e => e.IsCompressed)).Count(); int entriesRead = 0; int entriesDecompressed = 0; object _lock = new object(); void func(EntryOperation e) { lock (_lock) { switch (e.OperationPerformed) { case EntryOperation.ProcessType.Read: entriesRead++; Console.WriteLine($"Read: {entriesRead}/{entriesToRead}"); break; case EntryOperation.ProcessType.Decompress: entriesDecompressed++; Console.WriteLine($"Decompressed: {entriesDecompressed}/{entriesToDecompress}"); break; default: break; } } } Progress <EntryOperation> prog = new Progress <EntryOperation>(func); using var extractor = new Extractor(file); var task = extractor.ExtractAllAsync(fullDir, prog); await task; }
internal T Populate <T>(T e) where T : IExtractedEntity { if (e.Label.Valid) { return(e); // Already populated } if (ids.TryGetValue(e, out var existing)) { // It exists already e.Label = existing; } else { e.Label = GetNewLabel(); DefineLabel(e); ids.Add(e, e.Label); PopulateLater(() => { foreach (var c in e.Contents) { c.Extract(this); } }); #if DEBUG_LABELS using var writer = new EscapingTextWriter(); e.WriteId(writer); var id = writer.ToString(); if (debugLabels.TryGetValue(id, out var previousEntity)) { Extractor.Message(new Message("Duplicate trap ID", id, null, severity: Util.Logging.Severity.Warning)); } else { debugLabels.Add(id, e); } #endif } return(e); }
private void ReadProcedures(Database db, List <DBObjectType> dataToExtract) { Extractor.GetProcedures(db); foreach (Procedure p in db.Procedures) { if (BeforeExtractData != null) { BeforeExtractData(DBObjectType.Procedures, p.Name); } Extractor.GetProcedureText(db, p); if (dataToExtract.Contains(DBObjectType.All) || dataToExtract.Contains(DBObjectType.ProcedureInputParameters)) { Extractor.GetProcedureInputParameters(db, p); } if (dataToExtract.Contains(DBObjectType.All) || dataToExtract.Contains(DBObjectType.ProcedureOutputRecordSets)) { Extractor.GetProcedureOutputRecordSets(db, p); } } }
private void button1_Click(object sender, EventArgs e) { OpenFileDialog openFileDialog1 = new OpenFileDialog(); openFileDialog1.Multiselect = true; DialogResult result = openFileDialog1.ShowDialog(); // Show the dialog. if (result == DialogResult.OK) // Test result. { string[] filesArray = openFileDialog1.FileNames; List <string> filesList = new List <string>(filesArray); this.label1.Text = String.Format("Seleccionados: {0}", filesList.Count); this._extractor = new Extractor(filesList); this.ShowFilesInList(filesList); } else { MessageBox.Show("Debe seleccionar algun archivo PDF."); } }
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes: //ORIGINAL LINE: @Test void shouldCloneExtractor() internal virtual void ShouldCloneExtractor() { // GIVEN Extractors extractors = new Extractors(';'); Extractor <string> e1 = extractors.String(); Extractor <string> e2 = e1.Clone(); // WHEN string v1 = "abc"; e1.Extract(v1.ToCharArray(), 0, v1.Length, false); assertEquals(v1, e1.Value()); assertNull(e2.Value()); // THEN string v2 = "def"; e2.Extract(v2.ToCharArray(), 0, v2.Length, false); assertEquals(v2, e2.Value()); assertEquals(v1, e1.Value()); }
/// <summary> /// Incorporate one aggregatable value into the result. /// </summary> /// <remarks> /// If the <paramref name="isFinal"/> parameter is <b>true</b>, the /// given object is a partial result (returned by an individual /// parallel aggregator) that should be incorporated into the final /// result; otherwise, the object is a value extracted from an /// <see cref="IInvocableCacheEntry"/> /// </remarks> /// <param name="o"> /// The value to incorporate into the aggregated result. /// </param> /// <param name="isFinal"> /// <b>true</b> to indicate that the given object is a partial /// result returned by a parallel aggregator. /// </param> protected override void Process(object o, bool isFinal) { if (o != null) { if (isFinal) { if (o is DictionaryEntry) { DictionaryEntry entry = (DictionaryEntry)o; EnsureMap().Add(entry.Key, Extractor.Extract(entry.Value)); } } else { // should not be called with isFinal == false // that would mean multiple aggregators would be running on the client, // but shouldn't happen. throw new InvalidOperationException(); } } }
private static void AddIndices(Extractor extractor, Database database) { foreach (var schema in database.Schemas) { var tableID = new TableID(schema.DatabaseName, schema.SchemaName, null); var indices = extractor.Database.GetIndices(schema.SchemaName); foreach (var index in indices) { tableID.TableName = index.TableName; if (!database.tableDictionary.ContainsKey(tableID)) { continue; } var table = database.tableDictionary[tableID]; using (var ix = new Index(table, index.IndexName)) { var columns = from colname in index.IndexOrderdColumnNames orderby colname.Key select colname.Value; var addIndex = true; foreach (var colname in columns) { var column = table.Columns.FirstOrDefault(col => col.ColumnName == colname); if (column == null) { addIndex = false; break; } ix._columns.Add(column); } ix.IsUnique = index.IsUnique; if (addIndex) { table._indices.Add(ix); } } } } }
public MainForm() { InitializeComponent(); string[] spstring = Application.ExecutablePath.Split(new string[] { "\\osu! mp3 extractor.exe" }, StringSplitOptions.None); AppPath = spstring[0]; //appPath Configs = new Configurations(AppPath); //Initialize configurations if (!Configs.getConfigurations()) { ConfigurationsForm configForm = new ConfigurationsForm(); configForm.ShowDialog(); if (configForm.ShouldClose) { Environment.Exit(0); } while (!Configs.updateConfigurations(configForm.Cfg)) { configForm.Dispose(); configForm = new ConfigurationsForm(configForm.Cfg); configForm.ShowDialog(); if (configForm.ShouldClose) { Environment.Exit(0); } } } outputLabelData.Text = Configs.OutPath; osuLabelData.Text = Configs.OsuPath; extractor = new Extractor(this.progressBar, this.extractButton, this.optionsButton, this.cancelButton); Cdb = new CollectionDb(); Odb = new OsuDb(); updateComboBox(false); }
public bool tryClientConnect(Extractor job, int attempts, bool unlockOnFail) { Client c = new Client(); _report.addDebug("Attempting to connect to a client to verify a task is still running..."); for (int i = 0; i < attempts; i++) { try { c.connect(job.HostName, job.ListeningPort); c.disconnect(); _report.addDebug("The client appears to still be running!"); return(true); } catch (Exception) { c.disconnect(); if (i < attempts) { _report.addDebug("The client did not respond on attempt number " + (i + 1) + ", will try " + (attempts - i - 1) + " more times"); System.Threading.Thread.Sleep(1000); continue; } if (unlockOnFail) { try { _report.addDebug("The client did not respond after " + attempts + " connection attempts. Unlocking the job"); _sqlDao.unlockSite(job.SiteCode, job.VistaFile); } catch (Exception) { // this is probably ok - should get cleaned up eventually } } return(false); } } return(false); }
public byte[] this[int offset, int length] { get { if (offset < 16) { if ((16 - offset) > length) { length = 16 - offset; } byte[] r = new byte[length]; fixed(byte *pbyte = bytes) fixed(byte *rbyte = r) { Extractor.CopyBlock(rbyte, pbyte + offset, length); } return(r); } return(null); } set { if (offset < 16) { if ((16 - offset) > length) { length = 16 - offset; } if (value.Length < length) length = value.Length; fixed(byte *rbyte = value) fixed(byte *pbyte = bytes) { Extractor.CopyBlock(pbyte, rbyte, offset, length); } } } }
private void ProcessFile(string filename, Extractor item) { string destination = System.IO.Path.Combine(m_outputFolder, System.IO.Path.GetFileNameWithoutExtension(filename) + ".tif"); m_stderr.WriteLine("Processing " + filename); try { item.Open(isys_docfilters.IGR_BODY_AND_META | isys_docfilters.IGR_FORMAT_IMAGE); Hyland.DocumentFilters.Canvas canvas = m_filters.MakeOutputCanvas(destination, isys_docfilters.IGR_DEVICE_IMAGE_TIF, ""); try { for (int pageIndex = 0; pageIndex < item.GetPageCount(); pageIndex++) { Hyland.DocumentFilters.Page page = item.GetPage(pageIndex); try { canvas.RenderPage(page); } finally { page.Close(); } } } finally { canvas.Close(); } } catch (Exception e) { m_stderr.WriteLine("Error Processing " + filename); m_stderr.WriteLine(" - " + e.ToString()); } finally { item.Close(); } }
public List <ModelResult> Parse(string query, System.DateTime refTime) { // preprocess the query query = FormatUtility.Preprocess(query); var extractResults = Extractor.Extract(query); var parseDateTimes = new List <DateTimeParseResult>(); foreach (var result in extractResults) { parseDateTimes.Add(Parser.Parse(result, refTime)); } return(parseDateTimes.Select(o => new ModelResult { Start = o.Start.Value, End = o.Start.Value + o.Length.Value - 1, TypeName = o.Type, Resolution = o.Value as SortedDictionary <string, object>, Text = o.Text }).ToList()); }
/// <summary> /// 解析操作 /// </summary> /// <param name="page">页面数据</param> protected override void Handle(Page page) { var datas = Extractor.Extract(page.Selectable(), Model); if (datas == null) { return; } var items = datas.ToList(); foreach (var handler in _dataHandlers) { for (int i = 0; i < items.Count; ++i) { dynamic data = items.ElementAt(i); handler.Handle(ref data, page); } } page.AddResultItem(Model.Identity, new Tuple <IModel, IList <dynamic> >(Model, items)); }
public async Task TestExtractMethod() { var extractor = new Extractor(); var metaData1 = await extractor.ExtractAsync(new Uri("http://andrew.gubskiy.com/")); var metaData2 = await extractor.ExtractAsync(new Uri("http://torf.tv/")); var metaData3 = await extractor.ExtractAsync(new Uri("http://torf.tv/video/IraSkladPortrait")); var metaData4 = await extractor.ExtractAsync(new Uri("http://www.c-sharpcorner.com/news/stratis-bitcoin-full-node-for-net-core-in-c-sharp-goes-live")); var metaData5 = await extractor.ExtractAsync(new Uri("http://www.aaronstannard.com/the-coming-dotnet-reinassance/")); Assert.NotNull(metaData1); Assert.NotNull(metaData2); Assert.NotNull(metaData3); Assert.NotNull(metaData4); Assert.NotNull(metaData5); Assert.NotEmpty(metaData3.Keywords); }
static void Main(string[] args) { //TransferFromMongoToMSSql DataTransferer.TransferDataFromMongoToMsSql(); Extractor ext = new Extractor("..\\..\\"); //ExtractFromZIP ext.ExtractFromArchive("TravelInfo.zip"); //PDF Reporter PDFReporterGenerator.CreatePDF(); //JSON Reporter Reporter reporter1 = new Reporter(); reporter1.MakeReports(); //ExcelReporter var reporter = new ExcelReporter(); reporter.Report(); var dataReader = new XMLDataInserter(); dataReader.ParseXML(); }
public object TestRule(RuleModel rule, [FromUri] bool debug = false) { var request = new Request(rule.Url); request.Method = rule.Method; request.RunJS = (rule.RunJS == Status.ON); var response = Crawler.Request(request); if (response != null && response.Data != null) { var content = response.Data.ToString(); var block = RuiJiBlockParser.ParserBlock(rule.RuiJiExpression); var r = new ExtractRequest(); r.Content = content; r.Blocks = new List <ExtractFeatureBlock> { new ExtractFeatureBlock(block, rule.Feature) }; var results = Extractor.Extract(r); var result = results.OrderByDescending(m => m.Metas.Count).FirstOrDefault(); if (result.Paging != null && result.Paging.Count > 0 && result.Metas != null && result.Metas.ContainsKey("content")) { result = PagingExtractor.MergeContent(new Uri(rule.Url), result, block); } if (!debug) { CrawlTaskFunc.ClearContent(result); } return(result); } return(new { }); }
public override List <ModelResult> Parse(string query) { var parsedSequences = new List <ParseResult>(); // Preprocess the query query = QueryProcessor.Preprocess(query); try { var extractResults = Extractor.Extract(query); foreach (var result in extractResults) { parsedSequences.Add(Parser.Parse(result)); } } catch (Exception) { // Nothing to do. Exceptions in parse should not break users of recognizers. // No result. } return(parsedSequences.Select(o => new ModelResult { Start = o.Start.Value, End = o.Start.Value + o.Length.Value - 1, Resolution = new SortedDictionary <string, object> { { ResolutionKey.Value, o.ResolutionStr }, { ResolutionKey.Score, o.Value.ToString() }, }, Text = o.Text, TypeName = ModelTypeName, }).ToList()); }
/// <summary> /// Extracts an BZip2 file contained in fileEntry. /// </summary> /// <param name="fileEntry"> FileEntry to extract </param> /// <returns> Extracted files </returns> public async IAsyncEnumerable <FileEntry> ExtractAsync(FileEntry fileEntry, ExtractorOptions options, ResourceGovernor governor) { BZip2Stream?bzip2Stream = null; try { bzip2Stream = new BZip2Stream(fileEntry.Content, SharpCompress.Compressors.CompressionMode.Decompress, false); governor.CheckResourceGovernor(bzip2Stream.Length); } catch (Exception e) { Logger.Debug(Extractor.DEBUG_STRING, ArchiveFileType.BZIP2, fileEntry.FullPath, string.Empty, e.GetType()); } if (bzip2Stream != null) { var newFilename = Path.GetFileNameWithoutExtension(fileEntry.Name); var newFileEntry = await FileEntry.FromStreamAsync(newFilename, bzip2Stream, fileEntry); if (Extractor.IsQuine(newFileEntry)) { Logger.Info(Extractor.IS_QUINE_STRING, fileEntry.Name, fileEntry.FullPath); bzip2Stream.Dispose(); throw new OverflowException(); } await foreach (var extractedFile in Context.ExtractAsync(newFileEntry, options, governor)) { yield return(extractedFile); } bzip2Stream.Dispose(); } else { if (options.ExtractSelfOnFail) { yield return(fileEntry); } } }
/// <summary> /// Extracts item with given inserter. /// Only decrements this inventory, does not add to the inserter's inventory. /// Returns the item if successful, else null; /// </summary> /// <param name="item"></param> /// <param name="inserter"></param> /// <returns></returns> public GenericItem ExtractItem(GenericItem item, Extractor inserter) { //Side the inserter is using InsertSide insert_side = inserter.OUTPUT_SIDE; InsertDir insert_dir = GetInsertDir(inserter.GetComponent <GridController>()); BeltSide side = GetBeltSide(insert_dir, insert_side); BeltPos pos = GetBeltPos(insert_dir, insert_side); //Get the relevant list. List <ItemController> LIST = GetSideList(side); //Get the relevant index. int index = GetIndex(side, pos); //Save the item to be returned GenericItem item_ret = LIST[(int)pos].INFO; //Update side list. LIST[(int)pos].DespawnObject(); LIST[(int)pos] = null; return(item_ret); }
private static async Task RunExtractorAndDownloader(Config config, string courseUrl) { Console.WriteLine(TUI.CONTINUEGLYPH + "Extracting Course Data. This might take some time..."); var extractor = new Extractor(courseUrl, config.Quality, config.AuthenticationToken); if (!extractor.HasValidUrl()) { TUI.ShowError("The course url you provided is not a recognized valid Linkedin Learning link"); await RunWithConfig(config); return; } if (!await extractor.HasValidToken()) { TUI.ShowError("The token you provided is not valid"); await RunWithoutConfig(); return; } Course course; try { using var pbarExtractor = new ProgressBar(10000, "Extracting Course Links - This will take some time", optionPbarExtractor); course = await extractor.GetCourse(pbarExtractor.AsProgress <float>()); } catch (Exception ex) { TUI.ShowError(ex.Message); Log.Error(ex, ex.Message); await RunWithoutConfig(); return; } Console.WriteLine(TUI.ENDGLYPH + "Course Extracted Successfully"); Log.Information("Course Extracted. Downloading..."); Console.WriteLine(); CourseDownloader.DownloadCourse(course, config.CourseDirectory); }
public void TestDateTimeParser() { string message; if (TestUtils.EvaluateSpec(TestSpec, out message)) { Assert.Inconclusive(message, GetMessage(TestSpec)); } if (Debugger.IsAttached && TestSpec.Debug) { Debugger.Break(); } var referenceDateTime = TestSpec.GetReferenceDateTime(); var extractResults = Extractor.Extract(TestSpec.Input); var actualResults = extractResults.Select(o => DateTimeParser.Parse(o, referenceDateTime)).ToArray(); var expectedResults = TestSpec.CastResults <DateTimeParseResult>(); Assert.AreEqual(expectedResults.Count(), actualResults.Count(), GetMessage(TestSpec)); foreach (var tuple in Enumerable.Zip(expectedResults, actualResults, Tuple.Create)) { var expected = tuple.Item1; var actual = tuple.Item2; Assert.AreEqual(expected.Type, actual.Type, GetMessage(TestSpec)); Assert.AreEqual(expected.Text, actual.Text, GetMessage(TestSpec)); var actualValue = actual.Value as DateTimeResolutionResult; var expectedValue = JsonConvert.DeserializeObject <DateTimeResolutionResult>(expected.Value.ToString()); Assert.IsNotNull(actualValue, GetMessage(TestSpec)); Assert.AreEqual(expectedValue.Timex, actualValue.Timex, GetMessage(TestSpec)); CollectionAssert.AreEqual(expectedValue.FutureResolution, actualValue.FutureResolution, GetMessage(TestSpec)); CollectionAssert.AreEqual(expectedValue.PastResolution, actualValue.PastResolution, GetMessage(TestSpec)); } }
private void button2_Click(object sender, EventArgs e) { if (folderBrowserDialog2.ShowDialog() == DialogResult.OK) { try { IEnumerable <FileInfo> filesPaths = Extractor.GetFiles(folderBrowserDialog2.SelectedPath, new[] { "*.mp3", "*.jpg" }); Translit.TranslitFileNames(filesPaths); MessageBox.Show("Слава Богу, ошибок нет! Всё сработало", "", MessageBoxButtons.OK, MessageBoxIcon.Information); } catch { MessageBox.Show("Ошибка в ядре", "Упс", MessageBoxButtons.OK, MessageBoxIcon.Error); } } else { MessageBox.Show("Что то пошло не так", "Упс", MessageBoxButtons.OK, MessageBoxIcon.Error); } }
public byte[] this[int offset] { get { if (offset > 0 && offset < 8) { int l = (8 - offset); byte[] r = new byte[l]; fixed(byte *pbyte = bytes) fixed(byte *rbyte = r) Extractor.CopyBlock(rbyte, pbyte + offset, l); return(r); } return(GetBytes()); } set { int l = value.Length; if (offset > 0 || l < 8) { int count = 8 - offset; if (l < count) count = l; fixed(byte *pbyte = bytes) fixed(byte *rbyte = value) { Extractor.CopyBlock(pbyte, rbyte, offset, l); } } else { fixed(byte *v = value) fixed(byte *b = bytes) * (ulong *)b = *(ulong *)v; } } }
public void ExtractMembers_StructWithConstructors_ExtractsConstructorIncludingParameterless() { IExtractor extractor = new Extractor(typeof(StructWithConstructors)); const string interfaceName = "Custom.Namespace.IMyStruct"; const string implementationName = "Custom.Namespace.Impl"; var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName); var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName); write_output(actualInterface, actualImplementation); var expected = @" namespace Custom.Namespace { public interface IMyStruct { } }"; Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation"); expected = @" namespace Custom.Namespace { public class Impl : Custom.Namespace.IMyStruct { private readonly Core.Fixtures.Fakes.StructWithConstructors instance; public Impl(System.Int32 value) { instance = new Core.Fixtures.Fakes.StructWithConstructors(value); } public Impl(System.Int32 value1, System.Int32 value2) { instance = new Core.Fixtures.Fakes.StructWithConstructors(value1, value2); } public override System.Boolean Equals(System.Object obj) { return instance.Equals(obj); } public override System.Int32 GetHashCode() { return instance.GetHashCode(); } public override System.String ToString() { return instance.ToString(); } public Impl() { instance = new Core.Fixtures.Fakes.StructWithConstructors(); } } }"; Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation"); }
public void ExtractMembers_TypeOfDateTime_JustWriteItToConsole() { IExtractor extractor = new Extractor(typeof (DateTime)); var interfaceName = "Custom.Namespace.IDateTime"; var implementationName = "Custom.Namespace.DateTimeImplementation"; var extracted = extractor.ExtractInterfaceForInstanceMembers(interfaceName); Console.WriteLine(extracted); extracted = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName); Console.WriteLine(extracted); interfaceName = "Custom.Namespace.IDateTimeStatics"; implementationName = "Custom.Namespace.DateTimeStaticsImplementation"; extracted = extractor.ExtractInterfaceForStaticMembers(interfaceName); Console.WriteLine(extracted); extracted = extractor.ExtractImplementationForStaticMembers(interfaceName, implementationName); Console.WriteLine(extracted); Console.WriteLine(); }
/// <summary> /// /// </summary> /// <param name="entity"></param> /// <param name="symbol"></param> /// <param name="text"></param> /// <param name="attributes"></param> /// <param name="builder"></param> public void LinkToTextWithSymbol(Extractor.Entity entity, string symbol, string text, IDictionary<string, string> attributes, StringBuilder builder) { string taggedSymbol = string.IsNullOrWhiteSpace(SymbolTag) ? symbol : string.Format("<{0}>{1}</{0}>", SymbolTag, symbol); text = EscapeHTML(text); string taggedText = string.IsNullOrWhiteSpace(TextWithSymbolTag) ? text : string.Format("<{0}>{1}</{0}>", TextWithSymbolTag, text); bool includeSymbol = UsernameIncludeSymbol || !Regex.AT_SIGNS.IsMatch(symbol); if (includeSymbol) { LinkToText(entity, taggedSymbol.ToString() + taggedText, attributes, builder); } else { builder.Append(taggedSymbol); LinkToText(entity, taggedText, attributes, builder); } }
/// <summary> /// Initializes a new instance of the <see cref="Autolink"/> class. /// </summary> /// <param name="linkURLWithoutProtocol">Whether or not to link urls without a protocol</param> public Autolink(bool linkURLWithoutProtocol) { UrlClass = null; ListClass = DEFAULT_LIST_CLASS; UsernameClass = DEFAULT_USERNAME_CLASS; HashtagClass = DEFAULT_HASHTAG_CLASS; CashtagClass = DEFAULT_CASHTAG_CLASS; UsernameUrlBase = DEFAULT_USERNAME_URL_BASE; ListUrlBase = DEFAULT_LIST_URL_BASE; HashtagUrlBase = DEFAULT_HASHTAG_URL_BASE; CashtagUrlBase = DEFAULT_CASHTAG_URL_BASE; InvisibleTagAttrs = DEFAULT_INVISIBLE_TAG_ATTRS; NoFollow = true; __Extractor = new Extractor { ExtractURLWithoutProtocol = linkURLWithoutProtocol }; }
public void ExtractsUrlsCorrectly(SimpleTwitterCase testCase) { Extractor testExtractor = new Extractor(); var m = testExtractor.ExtractURLs(testCase.TestString); Assert.AreEqual(testCase.ExpectedTags.Count(), m.Count, testCase.Description); }
public static IDictionary<string, string> MakePackageMetadata(JObject catalogItem) { var extractor = new Extractor(); return extractor.Extract(catalogItem); }
public void ExtractsHashtagsCorrectly(SimpleTwitterCase testCase) { Extractor testExtractor = new Extractor(); var m = testExtractor.ExtractHashtags(testCase.TestString); Assert.AreEqual(testCase.ExpectedTags.Count(), m.Count, testCase.Description); foreach (var tag in m) { Assert.Contains(tag, testCase.ExpectedTags, tag + " Not Found"); } }
public static IDictionary<string, string> MakePackageMetadata(Package package) { var extractor = new Extractor(); return extractor.Extract(package); }
public void ExtractInstanceMembers_ClassWithMethods() { IExtractor extractor = new Extractor(typeof(ClassWithMethods)); const string interfaceName = "Custom.Namespace.IMyClass"; const string implementationName = "Custom.Namespace.SMyClass"; var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName); var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName); write_output(actualInterface, actualImplementation); var expected = @" namespace Custom.Namespace { public interface IMyClass { void MultipleParameters(System.String value1, System.Int32 value2); void OutParameter(out System.String value); void RefParameter(ref System.String value); System.Int32 Simple(); } }"; Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation"); expected = @" namespace Custom.Namespace { public class SMyClass : Custom.Namespace.IMyClass { private readonly Core.Fixtures.Fakes.ClassWithMethods instance; public SMyClass() { instance = new Core.Fixtures.Fakes.ClassWithMethods(); } public void MultipleParameters(System.String value1, System.Int32 value2) { instance.MultipleParameters(value1, value2); } public void OutParameter(out System.String value) { instance.OutParameter(out value); } public void RefParameter(ref System.String value) { instance.RefParameter(ref value); } public System.Int32 Simple() { return instance.Simple(); } public override System.Boolean Equals(System.Object obj) { return instance.Equals(obj); } public override System.Int32 GetHashCode() { return instance.GetHashCode(); } public override System.String ToString() { return instance.ToString(); } } }"; Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation"); }
protected Crawler(IDocumentFactory documentFactory, IKeyValueStore<string, Result> store, IKeyValueStore<string, FetchTarget> frontier) { _store = store; _frontier = frontier; var fetcherOptions = new FetcherOptions { UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36", }; var parserOptions = new ParserOptions { }; var scraperOptions = new ScraperOptions { }; var extractorOptions = new ExtractorOptions { }; //var storerOptions = new StorerOptions //{ //}; var builderOptions = new BuilderOptions { }; var providerOptions = new ProviderOptions { }; //var dispatcherOptions = new DispatcherOptions //{ //}; Fetcher = new Fetcher(fetcherOptions); Parser = new Parser(parserOptions, documentFactory); Scraper = new Scraper(scraperOptions); Extractor = new Extractor(extractorOptions); Storer = new Storer(store); Builder = new Builder(builderOptions); Provider = new Provider(providerOptions, store, frontier); Dispatcher = new Dispatcher(); Fetcher.SendTo(Parser, x => x.StatusCode == System.Net.HttpStatusCode.OK); Parser.SendTo(Scraper); Parser.SendTo(Extractor); Fetcher.SendTo(Builder, x => x.StatusCode == System.Net.HttpStatusCode.OK); Scraper.SendTo(Builder); Extractor.SendTo(Builder); Builder.SendTo(Storer); //Storer.LinkTo(new ActionBlock<Result>(x => //{ //})); Builder.SendTo(Provider); Provider.SendTo(Dispatcher, x => x != null); Dispatcher.SendTo(Fetcher); }
/// <summary> /// /// </summary> /// <param name="entity"></param> /// <param name="text"></param> /// <param name="builder"></param> public void LinkToCashtag(Extractor.Entity entity, string text, StringBuilder builder) { string cashtag = entity.Value; IDictionary<string, string> attrs = new Dictionary<string, string>(); attrs["href"] = CashtagUrlBase + cashtag; attrs["title"] = "$" + cashtag; attrs["class"] = CashtagClass; LinkToTextWithSymbol(entity, "$", cashtag, attrs, builder); }
public void ExtractInstanceMembers_SourceType_SubstituteCreatedType() { IExtractor extractor = new Extractor(typeof(ClassThatHasAReturnTypeOfSelf)); const string interfaceName = "Custom.Namespace.IClassThatHasAReturnTypeOfSelf"; const string implementationName = "Custom.Namespace.ClassWithMethodsImplementation"; var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName); var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName); write_output(actualInterface, actualImplementation); var expected = @" namespace Custom.Namespace { public interface IClassThatHasAReturnTypeOfSelf { Custom.Namespace.IClassThatHasAReturnTypeOfSelf ReturnSameType(); } }"; Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation"); expected = @" namespace Custom.Namespace { public class ClassWithMethodsImplementation : Custom.Namespace.IClassThatHasAReturnTypeOfSelf { private readonly Core.Fixtures.Fakes.ClassThatHasAReturnTypeOfSelf instance; public ClassWithMethodsImplementation() { instance = new Core.Fixtures.Fakes.ClassThatHasAReturnTypeOfSelf(); } public ClassWithMethodsImplementation(Core.Fixtures.Fakes.ClassThatHasAReturnTypeOfSelf instance) { this.instance = instance; } public Custom.Namespace.IClassThatHasAReturnTypeOfSelf ReturnSameType() { return new ClassWithMethodsImplementation(instance); } public override System.Boolean Equals(System.Object obj) { return instance.Equals(obj); } public override System.Int32 GetHashCode() { return instance.GetHashCode(); } public override System.String ToString() { return instance.ToString(); } } }"; Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation"); }
public void PassesTwitterIndexesForHashTags(IndexedTwitterTest testCase) { Extractor testExtractor = new Extractor(); var m = testExtractor.ExtractHashtags(testCase.TestString); foreach (var expect in testCase.Expectations) { Assert.Fail("NotImplemented"); } }
public void ExtractInstanceMembers_SimpleClassWithNoImplementation_OverrideMembersFromObject() { IExtractor extractor = new Extractor(typeof(SimpleClass)); const string interfaceName = "Custom.Namespace.IMyClass"; const string implementationName = "Custom.Namespace.SMyClass"; var actualInterface = extractor.ExtractInterfaceForInstanceMembers(interfaceName); var actualImplementation = extractor.ExtractImplementationForInstanceMembers(interfaceName, implementationName); write_output(actualInterface, actualImplementation); var expected = @" namespace Custom.Namespace { public interface IMyClass { } }"; Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation"); expected = @" namespace Custom.Namespace { public class SMyClass : Custom.Namespace.IMyClass { private readonly Core.Fixtures.Fakes.SimpleClass instance; public SMyClass() { instance = new Core.Fixtures.Fakes.SimpleClass(); } public override System.Boolean Equals(System.Object obj) { return instance.Equals(obj); } public override System.Int32 GetHashCode() { return instance.GetHashCode(); } public override System.String ToString() { return instance.ToString(); } } }"; Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation"); }
public void ExtractStaticMembers_StaticClass_DoNotExtractAnyInstanceMembers() { IExtractor extractor = new Extractor(typeof(StaticClass)); const string interfaceName = "Custom.Namespace.IStaticMyClass"; const string implementationName = "Custom.Namespace.SStaticMyClass"; var actualInterface = extractor.ExtractInterfaceForStaticMembers(interfaceName); var actualImplementation = extractor.ExtractImplementationForStaticMembers(interfaceName, implementationName); write_output(actualInterface, actualImplementation); var expected = @" namespace Custom.Namespace { public interface IStaticMyClass { void DoSomething(); } }"; Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation"); expected = @" namespace Custom.Namespace { public class SStaticMyClass : Custom.Namespace.IStaticMyClass { public void DoSomething() { Core.Fixtures.Fakes.StaticClass.DoSomething(); } } }"; Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation"); }
public bool Extract(StringDict args) { Extractor ex = new Extractor(); return ex.Extract(this, args); }
/// <summary> /// /// </summary> /// <param name="entity"></param> /// <param name="text"></param> /// <param name="attributes"></param> /// <param name="builder"></param> public void LinkToText(Extractor.Entity entity, string text, IDictionary<string, string> attributes, StringBuilder builder) { if (NoFollow) { attributes["rel"] = "nofollow"; } if (LinkAttributeModifier != null) { LinkAttributeModifier.Modify(entity, attributes); } if (LinkTextModifier != null) { text = LinkTextModifier.Modify(entity, text); } // append <a> tag builder.Append("<a"); foreach (var entry in attributes) { builder.Append(" ").Append(EscapeHTML(entry.Key)).Append("=\"").Append(EscapeHTML(entry.Value)).Append("\""); } builder.Append(">").Append(text).Append("</a>"); }
public void AutolinkJsonTest() { Extractor extractor = new Extractor(); List<string> failures = new List<string>(); foreach (dynamic test in LoadTestSection<string>("json")) { string actual = string.Empty; try { List<Extractor.Entity> entities = extractor.ExtractEntitiesWithIndices(test.text); foreach (Extractor.Entity entity in entities) { if (entity.Type == Extractor.EntityType.Url) { entity.DisplayURL = "twitter.com"; entity.ExpandedURL = "http://twitter.com/"; } } actual = autolink.AutoLinkEntities(test.text, entities); Assert.AreEqual(test.expected, actual); } catch (Exception) { failures.Add(string.Format("\n{0}: {1}\n\tExpected: {2}\n\t Actual: {3}", test.description, test.text, test.expected, actual)); } } if (failures.Any()) { Assert.Fail(string.Join("\n", failures)); } }
public void ExtractsRepliesCorrectly(SimpleTwitterCase testCase) { Extractor testExtractor = new Extractor(); var m = testExtractor.ExtractReplyScreenname(testCase.TestString); if (m == null) { Assert.IsEmpty(testCase.ExpectedTags.First(), testCase.Description); } else { Assert.AreEqual(testCase.ExpectedTags.First(), m, testCase.Description); } }
public void Test2() { List<String> urls = new List<string>(); urls.Add("http://www.autonews.com/"); urls.Add("http://www.geonius.com/www/"); urls.Add("http://en.wikipedia.org/wiki/Main_Page"); urls.Add("http://www.computerworld.com/"); List<string> seeds = StorageSystem.StorageSystem.getInstance().getSeedList(taskId); foreach (string seed in seeds) { urls.Add(seed); } List<Category> _categories; Constraints _constraints; _categories = StorageSystem.StorageSystem.getInstance().getCategories(taskId); _constraints = StorageSystem.StorageSystem.getInstance().getRestrictions(taskId); StorageSystem.StorageSystem.getInstance().getSeedList(taskId); Filter filter = new Filter("http://", _constraints); Categorizer categorizer = new Categorizer(_categories); Ranker ranker = new Ranker(categorizer); Extractor extractor = new Extractor(); HttpResourceFetcher httpfetcher = new HttpResourceFetcher(); foreach (String url in urls) { DateTime startTime = DateTime.Now; ResourceContent resource = null; if (httpfetcher.canFetch(url)) resource = httpfetcher.fetch(url, 10000, 100); DateTime fetchEndTime = DateTime.Now; if ((resource == null)||(resource.getResourceContent()==null)) continue; /*** 0. fetching the link from the internet ***/ TimeSpan fetchingTime = fetchEndTime - startTime; List<LinkItem> listOfLinks = new List<LinkItem>(); //extract all the links in page listOfLinks = extractor.extractLinks(resource.getResourceUrl(), resource.getResourceContent()); RuntimeStatistics.addToExtractedUrls(listOfLinks.Count); DateTime extEndTime = DateTime.Now; /*** 1. Extracting the link from the request ***/ TimeSpan extRequest = extEndTime - fetchEndTime; //reset the dictionary in filter that contains the urls from the same page filter.resetDictionary(); int filteredUrlsCount = 0; foreach (LinkItem item in listOfLinks) { //Filter the links and return only links that can be crawled List<String> links = new List<String>(); links.Add(item.getLink()); List<String> filteredLinks = filter.filterLinks(links); //If filteredLinks is not empty if (filteredLinks.Count > 0) { filteredUrlsCount++; Url url1 = new Url(filteredLinks[0], hashUrl(filteredLinks[0]), ranker.rankUrl(resource, item), item.getDomainUrl(), hashUrl(item.getDomainUrl())); deployLinksToFrontier(url1); RuntimeStatistics.addToFeedUrls(1); } } DateTime catStartTime = DateTime.Now; /*** 2. Ranking and deployment to the frontier ***/ TimeSpan rankTotalRequest = catStartTime - extEndTime; //Ascribe the url to all the categories it is belonged to. List<Result> classifiedResults = categorizer.classifyContent(resource.getResourceContent(), resource.getResourceUrl()); if (classifiedResults.Count != 0) RuntimeStatistics.addToCrawledUrls(1); DateTime catEndTime = DateTime.Now; /*** 3. Classification of the current request ***/ TimeSpan catTotalRequest = catEndTime - catStartTime; foreach (Result classifiedResult in classifiedResults) { Result result = new Result("0", classifiedResult.getUrl(), classifiedResult.getCategoryID(), resource.getRankOfUrl(), classifiedResult.getTrustMeter()); deployResourceToStorage(result); } DateTime endTime = DateTime.Now; /*** 4. deployment to the database (result) ***/ TimeSpan deployRequest = endTime - catEndTime; /*** 5. Total processing time ***/ TimeSpan totalRequest = endTime - startTime; } }
/// <summary> /// /// </summary> /// <param name="entity"></param> /// <param name="text"></param> /// <param name="builder"></param> public void LinkToMentionAndList(Extractor.Entity entity, string text, StringBuilder builder) { string mention = entity.Value; // Get the original at char from text as it could be a full-width char. string atChar = text.Substring(entity.Start, 1); IDictionary<string, string> attrs = new Dictionary<string, string>(); if (entity.ListSlug != null) { mention += entity.ListSlug; attrs["class"] = ListClass; attrs["href"] = ListUrlBase + mention; } else { attrs["class"] = UsernameClass; attrs["href"] = UsernameUrlBase + mention; } LinkToTextWithSymbol(entity, atChar, mention, attrs, builder); }
public void ExtractStaticMembers_ClassWithStatics_ExtractsStatics() { IExtractor extractor = new Extractor(typeof(ClassWithStatics)); const string interfaceName = "Custom.Namespace.IStaticMyClass"; const string implementationName = "Custom.Namespace.SStaticMyClass"; var actualInterface = extractor.ExtractInterfaceForStaticMembers(interfaceName); var actualImplementation = extractor.ExtractImplementationForStaticMembers(interfaceName, implementationName); write_output(actualInterface, actualImplementation); var expected = @" namespace Custom.Namespace { public interface IStaticMyClass { System.Int32 DoSomething(); System.Int32 GetProperty { get; } System.Int32 GetSetProperty { get; set; } System.Int32 SetProperty { set; } } }"; Assert.That(actualInterface.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on interface creation"); expected = @" namespace Custom.Namespace { public class SStaticMyClass : Custom.Namespace.IStaticMyClass { private readonly Core.Fixtures.Fakes.ClassWithStatics instance; public System.Int32 DoSomething() { return Core.Fixtures.Fakes.ClassWithStatics.DoSomething(); } public System.Int32 GetProperty { get { return Core.Fixtures.Fakes.ClassWithStatics.GetProperty; } } public System.Int32 GetSetProperty { get { return Core.Fixtures.Fakes.ClassWithStatics.GetSetProperty; } set { Core.Fixtures.Fakes.ClassWithStatics.GetSetProperty = value; } } public System.Int32 SetProperty { set { Core.Fixtures.Fakes.ClassWithStatics.SetProperty = value; } } } }"; Assert.That(actualImplementation.RemoveWhitespace(), Is.EqualTo(expected.RemoveWhitespace()), "failed on implementation creation"); }
/// <summary> /// /// </summary> /// <param name="entity"></param> /// <param name="text"></param> /// <param name="builder"></param> public void LinkToURL(Extractor.Entity entity, string text, StringBuilder builder) { string url = entity.Value; string linkText = EscapeHTML(url); if (entity.DisplayURL != null && entity.ExpandedURL != null) { // Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste // should contain the full original URL (expanded_url), not the display URL. // // Method: Whenever possible, we actually emit HTML that contains expanded_url, and use // font-size:0 to hide those parts that should not be displayed (because they are not part of display_url). // Elements with font-size:0 get copied even though they are not visible. // Note that display:none doesn't work here. Elements with display:none don't get copied. // // Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we // wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on // everything with the tco-ellipsis class. // // As an example: The user tweets "hi http://longdomainname.com/foo" // This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo" // This will get rendered as: // <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied --> // … // <!-- There's a chance the onCopy event handler might not fire. In case that happens, // we include an here so that the … doesn't bump up against the URL and ruin it. // The is inside the tco-ellipsis span so that when the onCopy handler *does* // fire, it doesn't get copied. Otherwise the copied text would have two spaces in a row, // e.g. "hi http://longdomainname.com/foo". // <span style='font-size:0'> </span> // </span> // <span style='font-size:0'> <!-- This stuff should get copied but not displayed --> // http://longdomai // </span> // <span class='js-display-url'> <!-- This stuff should get displayed *and* copied --> // nname.com/foo // </span> // <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied --> // <span style='font-size:0'> </span> // … // </span> // // Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1 // For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts. // For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine. string displayURLSansEllipses = entity.DisplayURL.Replace("…", ""); int diplayURLIndexInExpandedURL = entity.ExpandedURL.IndexOf(displayURLSansEllipses); if (diplayURLIndexInExpandedURL != -1) { string beforeDisplayURL = entity.ExpandedURL.Substring(0, diplayURLIndexInExpandedURL); string afterDisplayURL = entity.ExpandedURL.Substring(diplayURLIndexInExpandedURL + displayURLSansEllipses.Length); string precedingEllipsis = entity.DisplayURL.StartsWith("…") ? "…" : ""; string followingEllipsis = entity.DisplayURL.EndsWith("…") ? "…" : ""; string invisibleSpan = "<span " + InvisibleTagAttrs + ">"; StringBuilder sb = new StringBuilder("<span class='tco-ellipsis'>"); sb.Append(precedingEllipsis); sb.Append(invisibleSpan).Append(" </span></span>"); sb.Append(invisibleSpan).Append(EscapeHTML(beforeDisplayURL)).Append("</span>"); sb.Append("<span class='js-display-url'>").Append(EscapeHTML(displayURLSansEllipses)).Append("</span>"); sb.Append(invisibleSpan).Append(EscapeHTML(afterDisplayURL)).Append("</span>"); sb.Append("<span class='tco-ellipsis'>").Append(invisibleSpan).Append(" </span>").Append(followingEllipsis).Append("</span>"); linkText = sb.ToString(); } else { linkText = entity.DisplayURL; } } IDictionary<string, string> attrs = new Dictionary<string, string>(); attrs["href"] = url; if (!string.IsNullOrWhiteSpace(entity.DisplayURL) && !string.IsNullOrWhiteSpace(entity.ExpandedURL)) { attrs["title"] = entity.ExpandedURL; } if (!string.IsNullOrWhiteSpace(UrlClass)) { attrs["class"] = UrlClass; } if (!string.IsNullOrWhiteSpace(UrlTarget)) { attrs["target"] = UrlTarget; } LinkToText(entity, linkText, attrs, builder); }
private string[] GetValuesByRules(List<WordResult[]> words, Extractor extractor) { HashSet<string> bodys = new HashSet<string>(); string answer = string.Empty; bool shoudContine = false; string lastType = "no-type"; for (int i = 0; i < words.Count; i++) { for (int j = 1; j < words[i].Length - 1; j++) { string type = Utility.GetPOSString(words[i][j].nPOS).Trim(); if ((extractor.Signs.Contains(type) && extractor.FrontAllowSigns.Contains(lastType)) || shoudContine) { if (j < words[i].Length - 2) { string nextType = Utility.GetPOSString(words[i][j + 1].nPOS); if (extractor.NextContinueSigns.Contains(nextType)) { if (!shoudContine) { lastType = type; } answer += words[i][j].sWord; shoudContine = true; continue; } if (!extractor.NextBanSigns.Contains(nextType)) { answer += words[i][j].sWord; if (answer != string.Empty && !bodys.Contains(answer)) { bodys.Add(answer); answer = string.Empty; shoudContine = false; continue; } } else { if (answer != string.Empty && !bodys.Contains(answer)) { bodys.Add(answer); answer = string.Empty; shoudContine = false; } continue; } } shoudContine = false; answer += words[i][j].sWord; if (answer != string.Empty && !bodys.Contains(answer)) { bodys.Add(answer); answer = string.Empty; continue; } } if (answer != string.Empty && !bodys.Contains(answer)) { bodys.Add(answer); answer = string.Empty; } shoudContine = false; lastType = type; } } return bodys.ToArray(); }
/// <summary> /// /// </summary> /// <param name="entity"></param> /// <param name="text"></param> /// <param name="builder"></param> public void LinkToHashtag(Extractor.Entity entity, string text, StringBuilder builder) { // Get the original hash char from text as it could be a full-width char. string hashChar = text.Substring(entity.Start, 1); string hashtag = entity.Value; IDictionary<string, string> attrs = new Dictionary<string, string>(); attrs["href"] = HashtagUrlBase + hashtag; attrs["title"] = "#" + hashtag; if (Regex.RTL_CHARACTERS.IsMatch(text)) { attrs["class"] = HashtagClass + " rtl"; } else { attrs["class"] = HashtagClass; } LinkToTextWithSymbol(entity, hashChar, hashtag, attrs, builder); }