示例#1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="phrase"></param>
        /// <returns></returns>
        public static DocumentParser FromPhrase(string phrase)
        {
            if (phrase.Length <= 0)
            {
                throw new Exception("The phrase is empty!");
            }

            // make sure CoreNLP is initialized
            if (!VerifyStanfordServiceIsInitialized())
            {
                throw new Exception("Failed to initialize CoreNLP!");
            }

            StanfordDocumentFacade doc =
                StanfordPipelineService.Instance.Annotate(phrase);

            if (doc == null)
            {
                return(null);
            }

            DocumentParser docParser = new DocumentParser(doc);

            if (!docParser.InitForParsing())
            {
                throw new Exception("Empty phrase?!");
            }

            // #fromphrase
            docParser._fromFile = false;

            return(docParser);
        }
示例#2
0
        //--------------------------------------------------------------------------
        /// <summary>
        /// This is how you construct a new DocumentParser
        /// </summary>
        /// <param name="file"></param>
        /// <returns></returns>
        public static DocumentParser FromFile(FileInfo file)
        {
            if (!file.Exists)
            {
                throw new Exception("The document file doesn't exist!");
            }

            // make sure CoreNLP is initialized
            if (!VerifyStanfordServiceIsInitialized())
            {
                throw new Exception("Failed to initialize CoreNLP!");
            }

            StanfordDocumentFacade doc =
                StanfordPipelineService.Instance.Annotate(file);

            if (doc == null)
            {
                return(null);
            }

            DocumentParser docParser = new DocumentParser(doc);

            if (!docParser.ParseMetaData())
            {
                throw new Exception("Failed to get document's start mark!");
            }

            if (!docParser.CheckDocumentValidityAfterMetaData())
            {
                throw new Exception(
                          "The document is invalid - either no content or no end mark!");
            }

            if (!docParser.SetGutenbergId(file))
            {
                throw new Exception("Failed setting Gutenberg Id!");
            }

            docParser._fromFile = true;

            return(docParser);
        }