示例#1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="mediaWiki"></param>
        /// <returns></returns>
        public String[] Convert(mediawiki mediaWiki)
        {
            List <String> arParts = new List <string>();

            if (mediaWiki == null)
            {
                return(null);
            }

            arParts.Add(mediaWiki.page.title);
            arParts.Add(String.Format("http://en.wikipedia.org/wiki/{0}", mediaWiki.page.title));
            arParts.Add(mediaWiki.page.revision.text.Value);

            String text = mediaWiki.page.revision.text.Value;
            //Remove all the comments
            Regex regex = new Regex(@"<{1}.*?>{1}");

            text = regex.Replace(text, this.MatchEvaluatorSimple);

            //Process only the Introduction to save time and space
            int firstHeaderIndex = text.IndexOf("==");

            if (firstHeaderIndex != -1)
            {
                text = text.Substring(0, firstHeaderIndex);
            }

            //Remove all the special text
            regex = new Regex(@"{{([^{{])+?}}");
            while (regex.IsMatch(text))
            {
                text = regex.Replace(text, this.MatchEvaluatorSimple);
            }

            //Remove all the unneccesary text like "(, )"
            regex = new Regex(@"\(([^a..zA..Z0..9])+?\)");
            while (regex.IsMatch(text))
            {
                text = regex.Replace(text, this.MatchEvaluatorSimple);
            }

            //Simplify all the link text
            regex = new Regex(@"\[\[([^\|\]]+)(\|{0,1})([^\|\]]+)\]\](<nowiki>(.+)?</nowiki>){0,1}");
            text  = regex.Replace(text, this.MatchEvaluator);
            text  = text.Replace("'''", "");
            text  = text.Replace("''", "");
            text  = text.Substring(0, text.Length > 500 ? 500 : text.Length - 1);
            text += ".....";
            arParts.Add(text);
            return(arParts.ToArray());
        }
示例#2
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="mediaWiki"></param>
        /// <returns></returns>
        public String[] Convert(mediawiki mediaWiki)
        {
            List<String> arParts = new List<string>();
            if (mediaWiki == null)
                return null;

            arParts.Add(mediaWiki.page.title);
            arParts.Add(String.Format("http://en.wikipedia.org/wiki/{0}", mediaWiki.page.title));
            arParts.Add(mediaWiki.page.revision.text.Value);

            String text = mediaWiki.page.revision.text.Value;
            //Remove all the comments
            Regex regex = new Regex(@"<{1}.*?>{1}");
            text = regex.Replace(text, this.MatchEvaluatorSimple);

            //Process only the Introduction to save time and space
            int firstHeaderIndex = text.IndexOf("==");
            if (firstHeaderIndex != -1)
                text = text.Substring(0, firstHeaderIndex);

            //Remove all the special text
            regex = new Regex(@"{{([^{{])+?}}");
            while (regex.IsMatch(text))
                text = regex.Replace(text, this.MatchEvaluatorSimple);

            //Remove all the unneccesary text like "(, )"
            regex = new Regex(@"\(([^a..zA..Z0..9])+?\)");
            while (regex.IsMatch(text))
                text = regex.Replace(text, this.MatchEvaluatorSimple);

            //Simplify all the link text
            regex = new Regex(@"\[\[([^\|\]]+)(\|{0,1})([^\|\]]+)\]\](<nowiki>(.+)?</nowiki>){0,1}");
            text = regex.Replace(text, this.MatchEvaluator);
            text = text.Replace("'''", "");
            text = text.Replace("''", "");
            text = text.Substring(0, text.Length > 500 ? 500 : text.Length - 1);
            text += ".....";
            arParts.Add(text);
            return arParts.ToArray();
        }