/// <summary> /// Constructor /// </summary> /// <param name="title">Title of page</param> /// <param name="ns">Namespace of this page</param> /// <param name="ID">ID for this page</param> /// <param name="lines">Lines contained in this page</param> /// <param name="sectionLayout">Layout of sections (see section class for documentation)</param> public Page(string title, WikiDB.Namespace ns, int ID, List <string> lines, string sectionLayout) : this(title, ns, ID) { _lines = lines; _sections = Section.ExtractSections(lines, null, this, sectionLayout); _sectionsRecovered = true; }
/// <summary> /// Constructor /// </summary> /// <param name="title">Title of page</param> /// <param name="ns">Namespace of this page</param> /// <param name="ID">ID for this page</param> /// <param name="wikiText">Wiki text for page</param> /// <param name="database">Database containing this page</param> /// <param name="followRedirection">Whether or not to follow redirection</param> public Page(string title, WikiDB.Namespace ns, int ID, string wikiText, WikiDB database, bool followRedirection) : this(title, ns, ID) { // remove irrelevant markup wikiText = WikiMarkup.RemoveIrrelevantMarkup(wikiText); // split page up into lines _lines = Section.GetLines(wikiText); int firstSectionStart = Section.GetNextSectionStart(_lines, 0); List <string> headerLines = Section.ExtractLines(_lines, 0, firstSectionStart - 1); if (headerLines.Count > 0) { Header h = new Header(headerLines, this); _sections.Add(h); } // get sections _sections.AddRange(Section.ExtractSections(_lines, null, this)); // check for redirect page string firstLine = ""; if (_lines.Count > 0) { firstLine = _lines[0]; } string redirect = "#redirect"; if (firstLine.Length >= redirect.Length && firstLine.Substring(0, redirect.Length).ToLower() == redirect && WikiLinks.Count == 1 && followRedirection) { // get redirect page string redirectURL = WikiLinks[0].DestPageURL; _redirectsTo = database.LookupPage(ns, redirectURL, followRedirection); } // process markup WikiMarkup.ProcessMarkup(_lines); // set line information for the page SetLineInfo(); // get TF information foreach (Section s in _sections) { foreach (string line in s.Lines) { string[] tokens = line.Split(' '); foreach (string token in tokens) { // ignore case string lowerToken = token.ToLower().Trim(); lowerToken = WikiMarkup.TrimPunctuation(lowerToken); if (lowerToken == "" || WikiMarkup.IsStopWord(lowerToken, false)) { continue; } if (!_termFrequencies.ContainsKey(lowerToken)) { _termFrequencies[lowerToken] = 1.0F; } else { _termFrequencies[lowerToken] = _termFrequencies[lowerToken] + 1; } } } } }