/// <summary> /// 合并表 /// </summary> /// <param name="doc"></param> /// <param name="NextTableId"></param> public static void MergeTable(AnnouceDocument doc, int NextTableId) { var table = new HTMLTable(doc.root.TableList[NextTableId - 1]); string[] pos; string[] tablerec; string value; var offset = table.RowCount; //修改第二张表格的数据 foreach (var Nextitem in doc.root.TableList[NextTableId]) { tablerec = Nextitem.Split("|"); pos = tablerec[0].Split(","); value = tablerec[1]; var newtablerec = (NextTableId - 1) + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value; doc.root.TableList[NextTableId - 1].Add(newtablerec); } doc.root.TableList[NextTableId].Clear(); for (int i = 0; i < doc.root.Children.Count; i++) { for (int j = 0; j < doc.root.Children[i].Children.Count; j++) { var node = doc.root.Children[i].Children[j]; if (node.TableId == NextTableId) { node.TableId = -1; } } } }
private void populateListView() { bookingsTable = (HTMLTable)timesheet.all.item("ContentPlaceHolder1_GridViewSessions", 0); foreach (var table in bookingsTable.all) { foreach (var row in table.all) { if (row is HTMLTableRow) { OvertimeBooking timesheetBooking = parseRow(row); if (timesheetBooking != null) { currentBookings.Add(timesheetBooking); string[] bookingArray = { timesheetBooking.date, timesheetBooking.projectNo, timesheetBooking.description, timesheetBooking.hours, timesheetBooking.multiplier }; ListViewItem item = new ListViewItem(bookingArray); listView1.Items.Add(item); } } } } AdjustDescColumnToFill(listView1); thisMonthButton.Enabled = true; lastMonthButton.Enabled = true; loadingText.Visible = false; }
//寻找同时含有关键字的列的表头 public static void PutTrainingItem(HTMLEngine.MyRootHtmlNode root, string KeyWord) { foreach (var Table in root.TableList) { var t = new HTMLTable(Table.Value); for (int RowNo = 2; RowNo < t.RowCount; RowNo++) { //从第二行开始 for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++) { if (t.CellValue(RowNo, ColNo).NormalizeKey().Equals(KeyWord.NormalizeKey())) { var title = t.CellValue(1, ColNo); if (!TrainingTitleResult.ContainsKey(title)) { TrainingTitleResult.Add(title, 1); } else { TrainingTitleResult[title]++; } } } } } }
public void SetOption(ExcelWhlist wl, string newTrustee) { bool find_case = false; if (blankSearch.WaitForCreate()) { blankSearch.btnSearch.click(); } WaitForWebPage(spage); spage.txtHKID.value = wl.HKID; spage.btnsearch.click(); WaitForWebPage(spage); int index = -1; HTMLTable table = spage.tblSearchResult; if (table == null) { throw new Exception("This search has no return result."); } bool isContinute = SelectCase(wl.ERID); if (isContinute) { SetOptionPage(newTrustee); } else { } }
/// <summary> /// 寻找含有关键字的列的表头 /// </summary> /// <param name="root"></param> /// <param name="KeyWord"></param> public void PutTitleTrainingItem(HTMLEngine.MyRootHtmlNode root, string KeyWord) { foreach (var Table in root.TableList) { var t = new HTMLTable(Table.Value); for (int RowNo = 2; RowNo < t.RowCount; RowNo++) { //从第二行开始 for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++) { var title = t.CellValue(1, ColNo).Replace(" ", ""); if (String.IsNullOrEmpty(title)) { continue; } var value = t.CellValue(RowNo, ColNo); if (Transform != null) { value = Transform(value, title); } if (value.NormalizeTextResult().Equals(KeyWord.NormalizeTextResult())) { if (!TrainingTitleResult.ContainsKey(title)) { TrainingTitleResult.Add(title, 1); } else { TrainingTitleResult[title]++; } } } } } }
static List <struIncreaseStock> GetMultiTarget(HTMLEngine.MyRootHtmlNode root, struIncreaseStock SampleincreaseStock) { var BuyerRule = new TableSearchRule(); BuyerRule.Name = "认购对象"; //"投资者名称","股东名称" BuyerRule.Rule = new string[] { "发行对象", "认购对象", "发行对象名称" }.ToList(); BuyerRule.IsEq = true; var BuyNumber = new TableSearchRule(); BuyNumber.Name = "增发数量"; BuyNumber.Rule = new string[] { "配售股数", "认购数量", "认购股份数" }.ToList(); BuyNumber.IsEq = false; //包含即可 BuyNumber.Normalize = Normalizer.NormalizerStockNumber; var BuyMoney = new TableSearchRule(); BuyMoney.Name = "增发金额"; BuyMoney.Rule = new string[] { "配售金额", "认购金额" }.ToList(); BuyMoney.IsEq = false; //包含即可 BuyMoney.Normalize = Normalizer.NormalizerMoney; var FreezeYear = new TableSearchRule(); FreezeYear.Name = "锁定期"; FreezeYear.Rule = new string[] { "锁定期", "限售期" }.ToList(); FreezeYear.IsEq = false; //包含即可 FreezeYear.Normalize = NormalizerFreezeYear; var Rules = new List <TableSearchRule>(); Rules.Add(BuyerRule); Rules.Add(BuyNumber); Rules.Add(BuyMoney); Rules.Add(FreezeYear); var result = HTMLTable.GetMultiInfo(root, Rules, true); var increaseStocklist = new List <struIncreaseStock>(); foreach (var item in result) { var increase = new struIncreaseStock(); increase.id = SampleincreaseStock.id; increase.BuyMethod = SampleincreaseStock.BuyMethod; increase.PublishTarget = item[0].RawData; if (String.IsNullOrEmpty(increase.PublishTarget)) { continue; } increase.IncreaseNumber = item[1].RawData; increase.IncreaseMoney = item[2].RawData; increase.FreezeYear = item[3].RawData; increaseStocklist.Add(increase); } return(increaseStocklist); }
void GetPersonList() { //交易对象 var rtn = new List <(string TargetCompany, string TradeCompany)>(); TradeCompany.IsRequire = true; var Rules = new List <TableSearchTitleRule>(); Rules.Add(TradeCompany); var opt = new HTMLTable.SearchOption(); opt.IsMeger = true; opt.IsContainTotalRow = true; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count != 0) { //首页表格提取出交易者列表 var tableid = result[0][0].TableId; //注意:由于表格检索的问题,这里只将第一个表格的内容作为依据 //交易对方是释义表的一个项目,这里被错误识别为表头 //TODO:这里交易对方应该只选取文章前部的表格!! var TableTrades = result.Where(z => !ExplainTableId.Contains(z[0].TableId)) .Select(x => x[0].RawData) .Where(y => !y.Contains("不超过")).ToList(); PersonList.AddRange(TableTrades); } foreach (var e in ExplainDict) { if (e.Value.Contains("自然人")) { var PersonArray = e.Value.Split(Utility.SplitChar); foreach (var person in PersonArray) { if (person.Contains("等") || person.Contains("自然人")) { var trimPerson = person; if (trimPerson.Contains("等")) { trimPerson = Utility.GetStringBefore(trimPerson, "等"); } if (trimPerson.Contains("自然人")) { trimPerson = Utility.GetStringBefore(trimPerson, "自然人"); } PersonList.Add(trimPerson); } else { PersonList.Add(person); } } } } }
public void GetServersFromWeb(InternetExplorer ie, bool visible) { if (!groupName.Equals("PPE")) { try { object Empty = 0; object URL = Index.CreateInstance().temcurl + "?query=ON&group=" + groupName; ie.Visible = visible; ie.Navigate2(ref URL, ref Empty, ref Empty, ref Empty, ref Empty); System.Threading.Thread.Sleep(5000); while (ie.Busy) { System.Threading.Thread.Sleep(1000); } IHTMLDocument3 document = (IHTMLDocument3)ie.Document; if (document != null) { HTMLTable queryTable = (HTMLTable)document.getElementById("query_table"); if (queryTable != null && queryTable.rows != null && queryTable.rows.length > 1) { servers = new List <Server>(); for (int i = 1; i < queryTable.rows.length; i++) { HTMLTableRow row = (HTMLTableRow)queryTable.rows.item(i, i); if (row != null && row.cells != null && row.cells.length > 4) { HTMLTableCell serverCell = (HTMLTableCell)row.cells.item(0, 0); HTMLTableCell travelServerCell = (HTMLTableCell)row.cells.item(4, 4); if (serverCell != null && serverCell.innerText != null && !serverCell.innerText.Equals("") && travelServerCell != null && travelServerCell.innerText != null && !travelServerCell.innerText.Equals("")) { foreach (string singleTravelServer in travelServerCell.innerText.Split(new char[] { ',' })) { servers.Add(new Server(serverCell.innerText, singleTravelServer)); } } } } } } } catch (Exception exception) { Console.WriteLine(exception.Message); } finally { updateDate = DateTime.Now; OnUpdated(EventArgs.Empty); } } }
/// <summary> /// 单行合并 /// </summary> /// <param name="doc"></param> private static void OneRowFix(AnnouceDocument doc) { for (int NextTableId = 2; NextTableId <= doc.root.TableList.Count; NextTableId++) { var table = new HTMLTable(doc.root.TableList[NextTableId - 1]); var nexttable = new HTMLTable(doc.root.TableList[NextTableId]); if (table.RowCount == 1 && table.ColumnCount == nexttable.ColumnCount) { MergeTable(doc, NextTableId); } } }
public void TableTest() { Drivers.Browser.NavigateToUrl(ConfigurationManager.AppSettings["URL"].ToString()); CurrentPage = GetInstance <AmazonHome>(); CurrentPage = CurrentPage.As <AmazonHome>().ClickTodaysDealsButton(); CurrentPage = CurrentPage.As <TodaysDealsPage>().ClickUnder15Link(); //get table elements example var table = CurrentPage.As <TodaysDealsPage>().GetTodaysDealsList(); HTMLTable.ReadTable(table); HTMLTable.ClickTableElement("0", "test", "test", "Submit"); }
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { try { if (webBrowser1.ReadyState == WebBrowserReadyState.Complete) { if (webBrowser1.Url.ToString() == "http://intranet.cougarautomation.net/Timesheet/Timesheet.aspx") { Web_V1 = (SHDocVw.WebBrowser_V1)webBrowser1.ActiveXInstance; timesheet = (HTMLDocument)Web_V1.Document; bookingsTable = (HTMLTable)timesheet.all.item("ContentPlaceHolder1_GridViewSessions", 0); if (bookingsTable != null) { bookingsTableInnerHTML = bookingsTable.innerHTML; } else { bookingsTableInnerHTML = ""; } if (!thisMonth) { var anchorElements = timesheet.getElementsByTagName("a"); foreach (HTMLAnchorElement item in anchorElements) { if (item.title == "Go to the previous month") { item.click(); } } timer1.Start(); } else { HTMLInputElement monthRadioButton = (HTMLInputElement)timesheet.all.item("ContentPlaceHolder1_RadioButtonListSessions_2", 0); monthRadioButton.click(); timer1.Start(); } } } } catch (Exception exception) { MessageBox.Show("Unable to connect to timesheet page, please check your connection"); } }
/// <summary> /// 获得符合规则的行数据 /// </summary> /// <param name="root"></param> /// <param name="rule"></param> /// <returns></returns> public static List <List <CellInfo> > GetMultiRowsByContentRule(HTMLEngine.MyRootHtmlNode root, TableSearchContentRule rule) { var Container = new List <List <CellInfo> >(); for (int tableNo = 1; tableNo <= root.TableList.Count; tableNo++) { var table = new HTMLTable(root.TableList[tableNo]); var RowHeader = table.GetRow(1); for (int RowNo = 1; RowNo < table.RowCount; RowNo++) { var row = table.GetRow(RowNo); var IsMatch = false; foreach (var cell in row) { if (rule.Content != null) { foreach (var content in rule.Content) { if (rule.IsContentEq) { //相等模式 if (content.Equals(cell.Replace(" ", ""))) { Container.Add(ConvertRowToCellInfo(row, tableNo, RowNo, RowHeader)); IsMatch = true; break; } } else { //包含模式 if (content.Contains(cell.Replace(" ", ""))) { Container.Add(ConvertRowToCellInfo(row, tableNo, RowNo, RowHeader)); IsMatch = true; break; } } } } if (IsMatch) { break; } } } } return(Container); }
private List <DataTable> GetTables(TextFormats format) { IHTMLElementCollection tableCollection = this.doc3.getElementsByTagName("table"); List <DataTable> list = new List <DataTable>(); if (tableCollection != null) { foreach (var tableObject in tableCollection) { HTMLTable table = tableObject as HTMLTable; if (table != null) { DataTable dt = new DataTable(); foreach (HTMLTableRow htmlRow in table.rows) { DataRow row = dt.NewRow(); for (int i = 0; i < htmlRow.cells.length; i++) { if (dt.Columns.Count < i + 1) { dt.Columns.Add(); } HTMLTableCell cell = htmlRow.cells.item(i, i) as HTMLTableCell; if (format == TextFormats.Text) { row[i] = cell.innerText; } if (format == TextFormats.HTML) { row[i] = cell.innerHTML; } } dt.Rows.Add(row); } list.Add(dt); } } } return(list); }
private void timer1_Tick(object sender, EventArgs e) { if (bookingsTable == null) { bookingsTable = (HTMLTable)timesheet.all.item("ContentPlaceHolder1_GridViewSessions", 0); } else { if (bookingsTableInnerHTML != bookingsTable.innerHTML) { timer1.Stop(); populateListView(); } } }
public void NavigateToTableTab(string tabText) { foreach (IWebElement tab in ListPagination) { if (tab.Text == tabText) { tab.Click(); TableStaffDataParsed = WebElementExtensions.ParseHTMLTableByXPath(this.PageSource, "//table"); return; } } LogHelpers.WriteToLog($"[ERROR] :: Table Tab Not Found: {tabText}"); throw new NoSuchElementException($"Table Tab Not Found: {tabText}"); }
List <struStockChange> ExtractFromTableByContent() { var stockchangelist = new List <struStockChange>(); var rule = new TableSearchContentRule(); rule.Content = new string[] { "集中竞价交易", "竞价交易", "大宗交易", "约定式购回" }.ToList(); rule.IsContentEq = true; var result = HTMLTable.GetMultiRowsByContentRule(root, rule); foreach (var item in result) { //TODO:具体逻辑代码 } return(stockchangelist); }
/// <summary> /// 首行NULL的合并 /// </summary> /// <param name="doc"></param> private static void FirstRowNullFix(AnnouceDocument doc) { for (int NextTableId = 2; NextTableId <= doc.root.TableList.Count; NextTableId++) { foreach (var item in doc.root.TableList[NextTableId]) { var FirstTablePos = -1; var SecondTablePos = -1; foreach (var p in doc.root.Children) { foreach (var s in p.Children) { if (s.TableId == NextTableId - 1) { FirstTablePos = s.PositionId; } if (s.TableId == NextTableId) { SecondTablePos = s.PositionId; } } } if (SecondTablePos - FirstTablePos > 200) { continue; } var tablerec = item.Split("|"); var pos = tablerec[0].Split(","); var value = tablerec[1]; var row = int.Parse(pos[1]); //第二张表,第一行存在NULL if (row == 1 && value == strNullValue) { var table = new HTMLTable(doc.root.TableList[NextTableId - 1]); var nexttable = new HTMLTable(doc.root.TableList[NextTableId]); if (table.ColumnCount != nexttable.ColumnCount) { continue; } MergeTable(doc, NextTableId); Console.WriteLine("FirstRowNullFix"); break; } } } }
static List <struHoldAfter> GetHolderAfter(MyRootHtmlNode root) { var HoldList = new List <struHoldAfter>(); foreach (var table in root.TableList) { var mt = new HTMLTable(table.Value); for (int RowIdx = 0; RowIdx < mt.RowCount; RowIdx++) { for (int ColIdx = 0; ColIdx < mt.ColumnCount; ColIdx++) { if (mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持有股份") { var HolderName = mt.CellValue(RowIdx + 1, 1); Regex r = new Regex(@"\d+\.?\d*"); var strHolderCnt = mt.CellValue(RowIdx + 1, 5); strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt); var HolderCnt = ""; if (!String.IsNullOrEmpty(r.Match(strHolderCnt).Value)) { if (mt.CellValue(2, 5).Contains("万")) { //是否要*10000 HolderCnt = (double.Parse(r.Match(strHolderCnt).Value) * 10_000).ToString(); } else { HolderCnt = r.Match(strHolderCnt).Value; } } var StrPercent = mt.CellValue(RowIdx + 1, 6); var HodlerPercent = ""; if (!String.IsNullOrEmpty(r.Match(StrPercent).Value)) { HodlerPercent = (double.Parse(r.Match(StrPercent).Value) * 0.01).ToString(); } HoldList.Add(new struHoldAfter() { Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false }); } } } } return(HoldList); }
public void Then_STAFF_MEMBER_SHOULD_HAVE_POSITION(string name, string position) { HTMLTable table = _parallelTestExecution.CurrentPage.AsPage <TablesPage>().GetParsedTable(); bool match = false; foreach (Row row in table.Body) { // TODO: Use Parsed Header Names Instead Of Cell List Index if (row.Cells[0].Self.InnerText == name && row.Cells[1].Self.InnerText == position) { match = true; } } Assert.IsTrue(match); }
public static HTMLTable ParseHTMLTableByXPath(string pageSource, string tableXPath) { // Generate Virtual Document From Web Page Source HtmlDocument document = new HtmlDocument(); document.LoadHtml(pageSource); // Instantiate A New HTMLTable Object HTMLTable HTMLTable = new HTMLTable(); // Get The List Of Table Sections (Footer Not Included In This Implementation) var sections = from table in document.DocumentNode.SelectNodes(tableXPath).Cast <HtmlNode>() from section in table.SelectNodes("thead|tbody").Cast <HtmlNode>() select section; foreach (var section in sections) { // Get The List Of Rows In Each Table Section var rows = from row in section.SelectNodes("tr").Cast <HtmlNode>() select row; for (var rowIndex = 0; rowIndex < rows.Count(); rowIndex++) { var currentRow = rows.ToList()[rowIndex]; // Instantiate A New 1-Indexed Row Object (The First Row Will Have Index 1, For Consistency With Dataset Dimensions) Row row = new Row() { Index = rowIndex + 1 }; // Get The List Of Cells In Each Section Row var cells = from cell in currentRow.SelectNodes("th|td").Cast <HtmlNode>() select cell; // Instantiate All Cells And Add Them To The Current Row for (var columnIndex = 0; columnIndex < cells.Count(); columnIndex++) { var currentCell = cells.ToList()[columnIndex]; // Cells Are 1-Indexed (The First Cell Will Have Index (1, 1), For Consistency With Dataset Dimensions) row.Cells.Add(new Cell() { Index = (rowIndex + 1, columnIndex + 1), ColumnIndex = columnIndex + 1, Self = currentCell });
private void createHTMLToolStripMenuItem_Click(object sender, EventArgs e) { var css = File.ReadAllText(Data.TemplateDir + "/style.css"); var template = File.ReadAllText(Data.TemplateDir + "/index.html"); var artCol = new HTMLTableColumn("Article / Interview", "width: 55%"); var dateCol = new HTMLTableColumn("Date"); var pubCol = new HTMLTableColumn("Publisher"); var srcCol = new HTMLTableColumn("Sources / Mirrors"); var table = new HTMLTable(artCol, dateCol, pubCol, srcCol); foreach (var a in articles) { table.Row(HTML.Link(a.title, a.primarySource), a.date, a.publisher ?? "", HTML.LinksString(a.sources)); } template = template.Replace("<!--ARTICLES_TABLE-->", table.ToString()); table = new HTMLTable(new HTMLTableColumn("Video", "width: 80%"), dateCol); foreach (var v in videos) { table.Row(HTML.Link(v.title, v.primarySource), v.dateHTMLLinks); } template = template.Replace("<!--VIDEOS_TABLE-->", table.ToString()); table = new HTMLTable(new HTMLTableColumn("Game", "width: 80%"), new HTMLTableColumn("Year")); foreach (var s in source) { table.Row(HTML.Link(s.game, s.url), s.year); } template = template.Replace("<!--SOURCE_TABLE-->", table.ToString()); table = new HTMLTable(new HTMLTableColumn("Audio", "width: 80%"), dateCol); foreach (var a in audio) { table.Row(HTML.Link(a.title, a.primarySource), a.dateHTMLLinks); } template = template.Replace("<!--AUDIO_TABLE-->", table.ToString()); File.WriteAllText(Data.OutputDir + "/index.html", template); File.WriteAllText(Data.OutputDir + "/style.css", css); }
public static List <IHTMLElement> getRecentTracks(WebUtil w) { List <IHTMLElement> recentTracks = new List <IHTMLElement>(); HTMLTable tracksTable = w.getElementByClassName("table", "tracklist withimages") as HTMLTable; if (tracksTable != null) { foreach (IHTMLTableRow row in tracksTable.rows) { foreach (IHTMLElement cell in row.cells) { if (cell.className == "subjectCell ") { recentTracks.Add(cell); } } } } return(recentTracks); }
static string HTMLTableForPicturesLibrary(FileBrowseResult fbrs, int numberOfColumns, int pageNumber, int itemsPerPage, bool showThumbnails) { List <string> content = new List <string>(); foreach (BrowseItem strFolder in fbrs.Directories) { string cellContent = ""; // Link string folderPath = Path.Combine(fbrs.BaseDirectory, strFolder.Name); folderPath = Functions.EncodeToBase64(folderPath); string folderImageSource = "/static/images/imgFolder150x75.png"; HTMLImage image = new HTMLImage(folderImageSource, "folderpic"); cellContent += image.ToString(); cellContent += "<br />"; cellContent += Path.GetFileName(strFolder.Name); HTMLLink lnk = new HTMLLink("browsepics?PATH=" + folderPath, cellContent); content.Add(lnk.ToString()); } foreach (BrowseItem strFile in fbrs.Files) { // Assemble path to file string filePath = Path.Combine(fbrs.BaseDirectory, strFile.Name); filePath = Functions.EncodeToBase64(filePath); string imgSrc = "getfilethumbnail64?filename=" + filePath + "&size=medium"; HTMLImage image = new HTMLImage(imgSrc, "thumbnail"); // Link HTMLLink lnk = new HTMLLink("viewpic?FN=" + filePath + "&size=extralarge", image.ToString()); content.Add(lnk.ToString()); } return(HTMLTable.HTMLTableWithCellContents("picturelibrarytable", numberOfColumns, content)); }
List <struHoldAfter> GetHolderAfter() { var HoldList = new List <struHoldAfter>(); foreach (var table in root.TableList) { var mt = new HTMLTable(table.Value); for (int RowIdx = 0; RowIdx < mt.RowCount; RowIdx++) { for (int ColIdx = 0; ColIdx < mt.ColumnCount; ColIdx++) { if (mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持有股份" || mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持股") { var HolderName = mt.CellValue(RowIdx + 1, 1); var strHolderCnt = mt.CellValue(RowIdx + 1, mt.ColumnCount - 1); strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt); var title = mt.CellValue(2, 5); string HolderCnt = getAfterstock(title, strHolderCnt); var StrPercent = mt.CellValue(RowIdx + 1, mt.ColumnCount); var HodlerPercent = getAfterpercent(StrPercent); HoldList.Add(new struHoldAfter() { Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false }); } } } } if (HoldList.Count == 0) { HoldList = GetHolderAfter2ndStep(); } if (HoldList.Count == 0) { HoldList = GetHolderAfter3rdStep(); } return(HoldList); }
public string GenrateHTMLTable(HTMLTable Table) { StringBuilder Builder = new StringBuilder(); Builder.Append("<table>\n<tr>\n"); foreach (string ColumnName in Table.ColumnNames) { Builder.Append("<th>" + ColumnName + "</th>\n"); } Builder.Append("</tr>\n"); foreach (var Column in Table.ColumnValues) { Builder.Append("<tr>\n"); foreach (string value in Column.Values) { Builder.Append("<td>" + value + "</td>\n"); } Builder.Append("</tr>\n"); } Builder.Append("</table>"); return(Builder.ToString()); }
/// <summary> /// 某类标题的值 /// </summary> /// <param name="root"></param> /// <param name="KeyWord"></param> public void PutValueTrainingItem(HTMLEngine.MyRootHtmlNode root, List <string> TitleKeyWord) { foreach (var Table in root.TableList) { var t = new HTMLTable(Table.Value); for (int RowNo = 2; RowNo < t.RowCount; RowNo++) { //从第二行开始 for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++) { var title = t.CellValue(1, ColNo).Replace(" ", ""); if (String.IsNullOrEmpty(title)) { continue; } var value = t.CellValue(RowNo, ColNo).NormalizeTextResult(); if (string.IsNullOrEmpty(value)) { continue; } foreach (var key in TitleKeyWord) { if (title.Equals(key)) { if (!TrainingValueResult.ContainsKey(value)) { TrainingValueResult.Add(value, 1); } else { TrainingValueResult[value]++; } } } } } } }
List <RecordBase> ExtractMultiFromTable() { var Records = new List <RecordBase>(); var JiaFang = new TableSearchTitleRule(); JiaFang.Name = "甲方"; JiaFang.Title = new string[] { "采购人" }.ToList(); JiaFang.IsTitleEq = false; JiaFang.IsRequire = true; var YiFang = new TableSearchTitleRule(); YiFang.Name = "乙方"; //"投资者名称","股东名称" YiFang.Title = new string[] { "中标人" }.ToList(); YiFang.IsTitleEq = false; YiFang.IsRequire = true; var ProjectName = new TableSearchTitleRule(); ProjectName.Name = "项目名称"; ProjectName.Title = new string[] { "项目名称" }.ToList(); ProjectName.IsTitleEq = false; ProjectName.IsRequire = false; var Money = new TableSearchTitleRule(); Money.Name = "中标金额"; Money.Title = new string[] { "中标金额" }.ToList(); Money.IsTitleEq = false; Money.IsRequire = false; var Rules = new List <TableSearchTitleRule>(); Rules.Add(JiaFang); Rules.Add(YiFang); Rules.Add(ProjectName); Rules.Add(Money); var opt = new SearchOption(); opt.IsMeger = false; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count > 0) { Console.WriteLine("Table ExtractMulti ID:" + Id); foreach (var item in result) { var ContractRec = new ContractRec(); ContractRec.Id = Id; ContractRec.JiaFang = item[0].RawData; ContractRec.JiaFang = ContractRec.JiaFang.NormalizeTextResult(); ContractRec.YiFang = item[1].RawData; ContractRec.YiFang = ContractRec.YiFang.NormalizeTextResult(); foreach (var cn in companynamelist) { if (!String.IsNullOrEmpty(cn.secShortName) && cn.secShortName.Equals(ContractRec.YiFang)) { if (!string.IsNullOrEmpty(cn.secFullName)) { ContractRec.YiFang = cn.secFullName; break; } } } ContractRec.ProjectName = item[2].RawData; ContractRec.ProjectName = ContractRec.ProjectName.NormalizeTextResult(); ContractRec.ContractMoneyUpLimit = MoneyUtility.Format(item[3].RawData, item[3].Title); ContractRec.ContractMoneyDownLimit = ContractRec.ContractMoneyUpLimit; Records.Add(ContractRec); } } return(Records); }
/// <summary> /// 交易对方 /// </summary> /// <returns></returns> public List <string> getTradeCompany(ReorganizationRec target) { var rtn = new List <string>(); TradeCompany.IsRequire = true; var Rules = new List <TableSearchTitleRule>(); Rules.Add(TradeCompany); var opt = new HTMLTable.SearchOption(); opt.IsMeger = true; var result = HTMLTable.GetMultiInfoByTitleRules(root, Rules, opt); if (result.Count == 0) { return(rtn); } //首页表格提取出交易者列表 var tableid = result[0][0].TableId; //注意:由于表格检索的问题,这里只将第一个表格的内容作为依据 //交易对方是释义表的一个项目,这里被错误识别为表头 //TODO:这里交易对方应该只选取文章前部的表格!! var TableTrades = result.Where(z => !ExplainTableId.Contains(z[0].TableId)) .Select(x => x[0].RawData) .Where(y => !y.Contains("不超过")).ToList(); var TargetLoc = LocateProperty.LocateCustomerWord(root, new string[] { target.TargetCompanyFullName, target.TargetCompanyShortName }.ToList(), "标的"); var HolderLoc = LocateProperty.LocateCustomerWord(root, new string[] { "持有", "所持" }.ToList(), "持有"); var OwnerLoc = LocateProperty.LocateCustomerWord(root, TableTrades.ToList(), "交易对手"); CustomerList.AddRange(TargetLoc); CustomerList.AddRange(HolderLoc); CustomerList.AddRange(OwnerLoc); nermap.Anlayze(this); foreach (var nerlist in nermap.ParagraghlocateDict.Values) { //交易对手 持有 标的 这样的文字检索 int OwnerIdx = -1; int HolderIdx = -1; int TargetIdx = -1; nerlist.CustomerList.Sort((x, y) => { return(x.StartIdx.CompareTo(y.StartIdx)); }); var OwnerName = string.Empty; foreach (var ner in nerlist.CustomerList) { if (ner.Description == "交易对手") { OwnerIdx = ner.StartIdx; OwnerName = ner.Value; } if (ner.Description == "持有" && OwnerIdx != -1) { HolderIdx = ner.StartIdx; } if (ner.Description == "标的" && OwnerIdx != -1 && HolderIdx != -1) { TargetIdx = ner.StartIdx; } if (OwnerIdx != -1 && HolderIdx != -1 && TargetIdx != -1) { if (TargetIdx - OwnerIdx < 20) { rtn.Add(OwnerName); } OwnerIdx = -1; HolderIdx = -1; TargetIdx = -1; } } } return(rtn.Distinct().ToList()); }
public bool FindAlbuminfo(string strAlbum, string artistName, int releaseYear) { _albumList.Clear(); // strAlbum="1999";//escapolygy"; // make request // type is // http://www.allmusic.com/cg/amg.dll?P=amg&SQL=escapolygy&OPT1=2 HTMLUtil util = new HTMLUtil(); string postData = String.Format("P=amg&SQL={0}&OPT1=2", HttpUtility.UrlEncode(strAlbum)); string html = PostHTTP("http://www.allmusic.com/cg/amg.dll", postData); if (html.Length == 0) { return(false); } // check if this is an album MusicAlbumInfo newAlbum = new MusicAlbumInfo(); newAlbum.AlbumURL = "http://www.allmusic.com/cg/amg.dll?" + postData; if (newAlbum.Parse(html)) { _albumList.Add(newAlbum); return(true); } string htmlLow = html; htmlLow = htmlLow.ToLower(); int startOfTable = htmlLow.IndexOf("id=\"expansiontable1\""); if (startOfTable < 0) { return(false); } startOfTable = htmlLow.LastIndexOf("<table", startOfTable); if (startOfTable < 0) { return(false); } HTMLTable table = new HTMLTable(); string strTable = html.Substring(startOfTable); table.Parse(strTable); for (int i = 1; i < table.Rows; ++i) { HTMLTable.HTMLRow row = table.GetRow(i); string albumName = ""; string albumUrl = ""; string nameOfAlbum = ""; string nameOfArtist = ""; for (int iCol = 0; iCol < row.Columns; ++iCol) { string column = row.GetColumValue(iCol); if (iCol == 1 && (column.Length != 0)) { albumName = "(" + column + ")"; } if (iCol == 2) { nameOfArtist = column; util.RemoveTags(ref nameOfArtist); if (!column.Equals(" ")) { albumName = String.Format("- {0} {1}", nameOfArtist, albumName); } } if (iCol == 4) { string tempAlbum = column; util.RemoveTags(ref tempAlbum); albumName = String.Format("{0} {1}", tempAlbum, albumName); nameOfAlbum = tempAlbum; } if (iCol == 4 && column.IndexOf("<a href=\"") >= 0) { int pos1 = column.IndexOf("<a href=\""); pos1 += +"<a href=\"".Length; int iPos2 = column.IndexOf("\">", pos1); if (iPos2 >= 0) { if (nameOfAlbum.Length == 0) { nameOfAlbum = albumName; } // full album url: // http://www.allmusic.com/cg/amg.dll?p=amg&token=&sql=10:66jieal64xs7 string url = column.Substring(pos1, iPos2 - pos1); string albumNameStripped; albumUrl = String.Format("http://www.allmusic.com{0}", url); MusicAlbumInfo newAlbumInfo = new MusicAlbumInfo(); util.ConvertHTMLToAnsi(albumName, out albumNameStripped); newAlbumInfo.Title2 = albumNameStripped; newAlbumInfo.AlbumURL = util.ConvertHTMLToAnsi(albumUrl); newAlbumInfo.Artist = util.ConvertHTMLToAnsi(nameOfArtist); newAlbumInfo.Title = util.ConvertHTMLToAnsi(nameOfAlbum); _albumList.Add(newAlbumInfo); } } } } // now sort _albumList.Sort(new AlbumSort(strAlbum, artistName, releaseYear)); return(true); }
static List <struStockChange> ExtractFromTable(HTMLEngine.MyRootHtmlNode root, string id) { var StockHolderRule = new TableSearchRule(); StockHolderRule.Name = "股东全称"; StockHolderRule.Rule = new string[] { "股东名称" }.ToList(); StockHolderRule.IsEq = true; var ChangeDateRule = new TableSearchRule(); ChangeDateRule.Name = "变动截止日期"; ChangeDateRule.Rule = new string[] { "减持期间", "增持期间", "减持时间", "增持时间" }.ToList(); ChangeDateRule.IsEq = false; ChangeDateRule.Normalize = Normalizer.NormailizeDate; var ChangePriceRule = new TableSearchRule(); ChangePriceRule.Name = "变动价格"; ChangePriceRule.Rule = new string[] { "减持均价", "增持均价" }.ToList(); ChangePriceRule.IsEq = false; ChangePriceRule.Normalize = (x, y) => { if (x.Contains("元")) { return(Utility.GetStringBefore(x, "元")); } return(x); }; var ChangeNumberRule = new TableSearchRule(); ChangeNumberRule.Name = "变动数量"; ChangeNumberRule.Rule = new string[] { "减持股数", "增持股数" }.ToList(); ChangeNumberRule.IsEq = false; ChangeNumberRule.Normalize = Normalizer.NormalizerStockNumber; var Rules = new List <TableSearchRule>(); Rules.Add(StockHolderRule); Rules.Add(ChangeDateRule); Rules.Add(ChangePriceRule); Rules.Add(ChangeNumberRule); var result = HTMLTable.GetMultiInfo(root, Rules, false); //只写在最后一条记录的地方,不过必须及时过滤掉不存在的记录 result.Reverse(); var stockchangelist = new List <struStockChange>(); foreach (var rec in result) { var stockchange = new struStockChange(); stockchange.id = id; var Name = NormalizeCompanyName(rec[0].RawData); stockchange.HolderFullName = Name.Item1; stockchange.HolderShortName = Name.Item2; stockchange.ChangeEndDate = rec[1].RawData; stockchange.ChangePrice = rec[2].RawData; stockchange.ChangeNumber = rec[3].RawData; var holderafterlist = GetHolderAfter(root); for (int i = 0; i < holderafterlist.Count; i++) { var after = holderafterlist[i]; if (after.Used) { continue; } if (after.Name == stockchange.HolderFullName || after.Name == stockchange.HolderShortName) { stockchange.HoldNumberAfterChange = after.Count; stockchange.HoldPercentAfterChange = after.Percent; after.Used = true; break; } } //基本上所有的有效记录都有股东名和截至日期,所以,这里这么做,可能对于极少数没有截至日期的数据有伤害,但是对于整体指标来说是好的 if (string.IsNullOrEmpty(stockchange.HolderFullName) || string.IsNullOrEmpty(stockchange.ChangeEndDate)) { continue; } stockchangelist.Add(stockchange); } //合并记录 for (int i = 0; i < stockchangelist.Count; i++) { var x = stockchangelist[i]; for (int j = i + 1; j < stockchangelist.Count; j++) { var y = stockchangelist[j]; if (x.GetKey() == y.GetKey()) { if (string.IsNullOrEmpty(x.HoldNumberAfterChange) && !string.IsNullOrEmpty(y.HoldNumberAfterChange)) { x.id = ""; } if (!string.IsNullOrEmpty(x.HoldNumberAfterChange) && string.IsNullOrEmpty(y.HoldNumberAfterChange)) { y.id = ""; } } } } return(stockchangelist.Where((x) => { return !String.IsNullOrEmpty(x.id); }).ToList()); }