public void Process(IDocument document)
        {
            if (document.Length == 0 || (document.Language != Language.Unknown && document.Language != Language.Any))
            {
                return;
            }                                                                                                                     //Don't try to identify documents that already have their language set or is empty

            IDocument tempDocument = document;

            if (document.SpansCount == 0) // Have to tokenize temporarily the document
            {
                if (document.Length > 200)
                {
                    tempDocument = new Document(document.Value.Substring(0, 200));
                }
                else
                {
                    tempDocument = new Document(document.Value);
                }
                Tokenizer.Process(tempDocument);
            }

            var tag = Model.PredictMax(tempDocument, 200);

            document.Language = Languages.CodeToEnum(tag.label);
        }
        private IDocument Prepare(IDocument document)
        {
            IDocument tempDocument = document;

            if (document.SpansCount == 0) // Have to tokenize temporarily the document
            {
                if (document.Length > 1000)
                {
                    tempDocument = new Document(document.Value.Substring(0, 1000));
                }
                else
                {
                    tempDocument = new Document(document.Value);
                }
                Tokenizer.Process(tempDocument);
                NumberNormalizer.Process(tempDocument);
            }

            return(tempDocument);
        }