C# (CSharp) IDocument.AddSpan Examples

Programming Language: C# (CSharp)

Class/Type: IDocument

Method/Function: AddSpan

Examples at hotexamples.com: 3

C# (CSharp) IDocument.AddSpan - 3 examples found. These are the top rated real world C# (CSharp) examples of IDocument.AddSpan extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Clone(30)

As(10)

Add(8)

BeginRead(8)

AppendContentStream(8)

BeginWrite(7)

CheckOut(6)

AddDocument(6)

TryGetSemanticModel(6)

AddPage(5)

CheckIn(5)

Activate(4)

AddElement(4)

CanRedo(3)

appendChild(3)

GetTextVersion(3)

CanUndo(3)

Adopt(3)

AddSpan(3)

AddPages(3)

Cast(2)

CancelCheckOut(2)

GetAllText(2)

BlankDocument(2)

GetPage(2)

AwaitEventAsync(2)

AssertThatIfContentHashExistsItIsEqualTo(2)

Clear(2)

getDocumentElement(2)

AddElementToArray(2)

AsDynamic(1)

AddObjectFieldValue(1)

getBaseURI(1)

AddColumn(1)

ToDocType(1)

OpenDocument(1)

NewRootForNukeMethodAndAnySideEffectsInArguments(1)

AddFont(1)

AddImage(1)

GetDynamicMenu(1)

AddLastSectionProperties(1)

AddNumberingInstance(1)

AddNumberingStyle(1)

Checkin(1)

AddSectionProperties(1)

ApplyHighlight(1)

ChangeDocumentImages(1)

AfterSave(1)

Allow(1)

AppendChild(1)

Example #1

Show file

File: SpaceTokenizer.cs Project: yasmineChelly-95/catalyst

 public void Parse(IDocument document)
 {
     if (!document.Spans.Any())
     {
         document.AddSpan(0, document.Length - 1);
     }
     foreach (ISpan s in document.Spans)
     {
         Parse(s);
     }
 }

Example #2

Show file

        public void Parse(IDocument document)
        {
            if (!document.Spans.Any())
            {
                document.AddSpan(0, document.Length - 1);
            }

            foreach (ISpan s in document.Spans)
            {
                try
                {
                    Parse(s);
                }
                catch (InvalidOperationException ome)
                {
                    Logger.LogError(ome, "Error tokenizing document:\n'{TEXT}'", document.Value);
                    document.Clear();
                }
            }
        }

Example #3

Show file

File: SentenceDetector.cs Project: yasmineChelly-95/catalyst

        public void Parse(IDocument document)
        {
            if (document.Length == 0)
            {
                return;
            }

            if (document.Spans.Count() != 1)
            {
                return; //Document has already been tokenized and passed to the sentence detection, so ignore the second call
            }

            var tokens = document.Spans.First().Tokens.ToArray();

            if (tokens.Length == 0)
            {
                return;
            }

            bool hasReplacements = false;

            //NOTE: This loop is not used for anything here, but instead to force tokens to cache the replacement
            //      As they'll not be able to retrieve it later when re-added to the document.
            for (int i = 0; i < tokens.Length; i++)
            {
                hasReplacements |= (tokens[i].Replacement is null);
            }

            var text = document.Value.AsSpan();

            const int padding = 2;

            var paddedTokens = new List <IToken>(tokens.Length + 2 * padding);

            paddedTokens.Add(SpecialToken.BeginToken);
            paddedTokens.Add(SpecialToken.BeginToken);
            paddedTokens.AddRange(tokens);
            paddedTokens.Add(SpecialToken.EndToken);
            paddedTokens.Add(SpecialToken.EndToken);

            int N = paddedTokens.Count;

            var isSentenceEnd = new bool[N];

            for (int i = padding + 1; i < N - padding - 1; i++) //Skip BeginTokens and EndTokens, and first and last token of sentence
            {
                if (paddedTokens[i].ValueAsSpan.IsSentencePunctuation())
                {
                    var features = GetFeatures(paddedTokens, i);
                    isSentenceEnd[i] = PredictTagFromFeatures(features, Data.Weights);
                }
            }

            document.Clear();

            //Now split the original document at the right places

            //If any sentence detected within the single span (i.e. ignoring the first and last tokens
            if (isSentenceEnd.AsSpan().Slice(padding + 1, tokens.Length - 1).IndexOf(true) >= 0)
            {
                int offset = 0;
                for (int i = padding; i < N - padding; i++)
                {
                    if (isSentenceEnd[i])
                    {
                        int b = offset;
                        int e = tokens[i - padding].End;
                        if (e < b)
                        {
                            continue;
                        }
                        while (char.IsWhiteSpace(text[b]) && b < e)
                        {
                            b++;
                        }

                        while (char.IsWhiteSpace(text[e]) && e > b)
                        {
                            e--;
                        }

                        try
                        {
                            if (!text.Slice(b, e - b + 1).IsNullOrWhiteSpace())
                            {
                                var span = document.AddSpan(b, e);
                                foreach (var t in tokens)
                                {
                                    if (t.Begin >= span.Begin && t.End <= span.End)
                                    {
                                        span.AddToken(t); //Re-add the tokens back in the document
                                    }
                                }
                            }
                        }
                        catch (Exception)
                        {
                            Logger.LogCritical("Failed to tokenize: b={b} e={e} l={l} offset={offset} tEnd={tEnd} i={i} tCount={tCount}", b, e, text.Length, offset, tokens[i - padding].End, i, tokens.Length);
                            throw;
                        }
                        offset = e + 1;
                    }
                }
                if (offset <= document.Length - 1)
                {
                    int b = offset;
                    int e = document.Length - 1;
                    while (char.IsWhiteSpace(text[b]) && b < e)
                    {
                        b++;
                    }
                    while (char.IsWhiteSpace(text[e]) && e > b)
                    {
                        e--;
                    }

                    if (!text.Slice(b, e - b + 1).IsNullOrWhiteSpace())
                    {
                        var span = document.AddSpan(b, e);
                        foreach (var t in tokens)
                        {
                            if (t.Begin >= span.Begin && t.End <= span.End)
                            {
                                span.AddToken(t);
                            }
                        }
                    }
                }
            }
            else
            {
                int b = 0;
                int e = document.Length - 1;
                while (char.IsWhiteSpace(text[b]) && b < e)
                {
                    b++;
                }
                while (char.IsWhiteSpace(text[e]) && e > b)
                {
                    e--;
                }

                var span = document.AddSpan(b, e);
                foreach (var t in tokens)
                {
                    if (t.Begin >= span.Begin && t.End <= span.End)
                    {
                        span.AddToken(t); //Re-add the tokens back in the document
                    }
                }
            }
        }