/// <summary> /// Gets the selector. /// </summary> /// <param name="scoreSource">The score source.</param> /// <returns></returns> public static Func <DocumentSimilarityResultPair, double> GetSelector(this DocumentSimilarityScoreEnum scoreSource) { Func <DocumentSimilarityResultPair, double> scoreSelector = null; switch (scoreSource) { case DocumentSimilarityScoreEnum.combined: scoreSelector = new Func <DocumentSimilarityResultPair, double>(x => x.OverallSimilarity); break; case DocumentSimilarityScoreEnum.content: scoreSelector = new Func <DocumentSimilarityResultPair, double>(x => x.ContentSimilarity); break; case DocumentSimilarityScoreEnum.higherComponent: scoreSelector = new Func <DocumentSimilarityResultPair, double>(x => Math.Max(x.ContentSimilarity, x.StructureSimilarity)); break; default: case DocumentSimilarityScoreEnum.structure: scoreSelector = new Func <DocumentSimilarityResultPair, double>(x => x.StructureSimilarity); break; break; } return(scoreSelector); }
/// <summary> /// Gets the selector. /// </summary> /// <param name="scoreSource">The score source.</param> /// <returns></returns> public static List <Func <DocumentSimilarityResultPair, double> > GetSelectorList(this DocumentSimilarityScoreEnum scoreSource) { List <Func <DocumentSimilarityResultPair, double> > scoreSelectors = new List <Func <DocumentSimilarityResultPair, double> >(); switch (scoreSource) { case DocumentSimilarityScoreEnum.searchBestFit: scoreSelectors.Add(DocumentSimilarityScoreEnum.structure.GetSelector()); scoreSelectors.Add(DocumentSimilarityScoreEnum.higherComponent.GetSelector()); scoreSelectors.Add(DocumentSimilarityScoreEnum.combined.GetSelector()); scoreSelectors.Add(DocumentSimilarityScoreEnum.content.GetSelector()); break; default: scoreSelectors.Add(scoreSource.GetSelector()); break; } return(scoreSelectors); }
public DataTable PublishDataTable(Dictionary <HtmlNode, HtmlSourceAndUrl> documentNodeDictionary, DocumentSimilarityScoreEnum scoreSource, List <HtmlNode> reportOn = null) { if (reportOn == null) { reportOn = GetDocuments(); } String tablename = scoreSource.ToString(); Func <DocumentSimilarityResultPair, Double> scoreFunction = scoreSource.GetSelector(); DataTable table = new DataTable(tablename); var DocumentByLabel = GetLabeledDocuments(); var LabelByDocument = GetLabelsByDocument(); List <DocumentSimilarityResultPair> selected = new List <DocumentSimilarityResultPair>(); Dictionary <HtmlNode, Dictionary <HtmlNode, DocumentSimilarityResultPair> > matrix = new Dictionary <HtmlNode, Dictionary <HtmlNode, DocumentSimilarityResultPair> >(); foreach (HtmlNode node in reportOn) { var results = GetResultsFor(node); matrix.Add(node, results); } Dictionary <String, DataColumn> ColumnsByLabel = new Dictionary <string, DataColumn>(); DataColumn labelColumn = table.Columns.Add("Label"); foreach (var pair in matrix) { var cn = table.Columns.Add(LabelByDocument[pair.Key], typeof(Double)); var source = documentNodeDictionary[pair.Key]; cn.SetFormat("F3"); cn.SetDesc("Similarity score with " + source.filepath); ColumnsByLabel.Add(LabelByDocument[pair.Key], cn); } DataColumn UrlColumn = table.Columns.Add("Url"); DataColumn sourceColumn = table.Columns.Add("Filepath"); foreach (var pair in matrix) { DataRow dr = table.NewRow(); var source = documentNodeDictionary[pair.Key]; dr[labelColumn] = LabelByDocument[pair.Key]; dr[sourceColumn] = source.filepath; dr[UrlColumn] = source.url; foreach (var subPair in pair.Value) { if (reportOn.Contains(subPair.Key)) { var dc = ColumnsByLabel[LabelByDocument[subPair.Key]]; dr[dc] = scoreFunction(subPair.Value); } } table.Rows.Add(dr); } return(table); }