コード例 #1
0
        public ExtractionResult ExtractText(IndexingRequestDetails details, Stream stream)
        {
            using (var reader = new StreamReader(stream, details.Encoding, false, 16, true)) {
                var doc = XDocument.Load(reader);

                var returnResult = new ExtractionResult(details);

                if (Extensions.Count == 0)
                {
                    returnResult.GenerateSegments(doc, SimpleDegrapher.XElementDegrapher);
                }
                else
                {
                    foreach (var ext in Extensions)
                    {
                        if (ext.TryProcess(returnResult, doc))
                        {
                            break;
                        }
                    }
                }

                returnResult.AnnotateSegments();
                return(returnResult);
            }
        }
コード例 #2
0
        public ExtractionResult ExtractText(IndexingRequestDetails details, Stream stream)
        {
            using (var reader = new StreamReader(stream, details.Encoding, false, 16, true)) {
                var document = reader.ReadToEnd();
                var lines    = document.Split('\r', '\n').Where(s => !string.IsNullOrEmpty(s)).ToArray();

                var returnResult = new ExtractionResult(details);
                returnResult.GenerateSegments(lines, null);
                returnResult.AnnotateSegments();

                return(returnResult);
            }
        }
コード例 #3
0
        public ExtractionResult ExtractText(IndexingRequestDetails details, Stream stream)
        {
            using (var reader = new StreamReader(stream, details.Encoding, false, 16, true)) {
                var obj = JsonConvert.DeserializeObject(reader.ReadToEnd());


                var returnResult = new ExtractionResult(details);
                returnResult.GenerateSegments(obj, JsonDegrapher);
                returnResult.AnnotateSegments();

                return(returnResult);
            }
        }
コード例 #4
0
        public ExtractionResult ExtractText(IndexingRequestDetails details, Stream stream)
        {
            using (var reader = new StreamReader(stream, details.Encoding, false, 16, true)) {
                var rows = new List <string>();
                var row  = string.Empty;
                while ((row = reader.ReadLine()) != null)
                {
                    var entry = string.Join("\r\n", row.Split('\t').AsEnumerable().Reverse());
                    rows.Add(entry);
                }


                var returnResult = new ExtractionResult(details);

                returnResult.GenerateSegments(rows, null);
                returnResult.AnnotateSegments();
                return(returnResult);
            }
        }
コード例 #5
0
        public ExtractionResult ExtractText(IndexingRequestDetails details, Stream stream)
        {
            using (var reader = new StreamReader(stream, details.Encoding, false, 16, true)) {
                var config = Configuration.Default.WithDefaultLoader();

                var document = new HtmlParser(config).Parse(reader.ReadToEnd());

                // using degrapher because AngleSharp uses recursion
                var returnResult = new ExtractionResult(details);
                if (FullDocumentCapture)
                {
                    returnResult.GenerateSegments(document.TextContent, HtmlDegrapher);
                }
                else
                {
                    returnResult.GenerateSegments(document, HtmlDegrapher);
                }
                returnResult.AnnotateSegments();

                return(returnResult);
            }
        }