public static bool DoesBibTeXMatchDocument(BibTexItem bibtex_item, PDFDocument pdf_document, out PDFSearchResultSet search_result_set) { try { string authors_string = BibTexTools.GetAuthor(bibtex_item); if (!String.IsNullOrEmpty(authors_string)) { List <NameTools.Name> names = NameTools.SplitAuthors(authors_string, PDFDocument.UNKNOWN_AUTHORS); StringBuilder sb = new StringBuilder(); foreach (NameTools.Name name in names) { sb.AppendFormat("\"{0}\" ", name.last_name); } string names_search_string = sb.ToString(); if (!String.IsNullOrEmpty(names_search_string)) { search_result_set = PDFSearcher.Search(pdf_document, 1, names_search_string, PDFSearcher.MATCH_CONTAINS); if (0 < search_result_set.Count) { return(true); } } } } catch (Exception) { } search_result_set = new PDFSearchResultSet(); return(false); }
public void UnitTestMethod() { // Example 1: Loading BibTex file BibTexDataFile bib_1 = new BibTexDataFile("Resources\\test\\S0306457309000259.bib"); // Example 2: Loading BibTex file String path = folderResources.findFile("S0306457309000259.bib", SearchOption.AllDirectories); // initializes bibtex data file object BibTexDataFile bib = new BibTexDataFile(); // loads .bib or .bibtex file from path specified bib.Load(path, log); // converts loaded BibTex entries into DataTable, with all columns discovered in the entries DataTable dt = bib.ConvertToDataTable(); // saves DataTable to Excel file, without adding Legend spreadsheet var finalPath = dt.serializeDataTable(Data.enums.reporting.dataTableExportEnum.excel, bib.name, folderResults, notation); // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var reportDataTable_ref = dt.GetReportAndSave(folderResults, notation); // Example 3: Short way // High-level method, creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var reportDataTable = BibTexTools.ExportToExcel(path, notation, log); // Example 4: Working with BibTexEntryModel // Creation of BibTex entry from code BibTexEntryModel entry = new BibTexEntryModel() { EntryKey = "SOKOLOVA2009427", EntryType = "article", journal = "Information Processing & Management", title = "A systematic analysis of performance measures for classification tasks", keywords = "Performance evaluation, Machine Learning, Text classification", year = 2005, number = 2, issn = "0000-0000", @abstract = "Abs", doi = "https://doi.org/10.1016/j.ipm.2009.03.002", url = "http://www.sciencedirect.com/science/article/pii/S0306457309000259" }; // Creation of data table collection DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time"); // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation); }
private static void ExtractTagsFromBibTeXField(string bibtex, string TAG, List <string> tags) { string vals = BibTexTools.GetField(bibtex, TAG); if (!String.IsNullOrEmpty(vals)) { string[] ret = vals.Split(new char[] { ',', ';' }, StringSplitOptions.RemoveEmptyEntries); tags.AddRange(ret.Select(x => x.Trim())); } }
private string CreatePaperTweet() { var pdf_document_bindable = DataContext as AugmentedBindable <PDFDocument>; if (null == pdf_document_bindable) { return(null); } PDFDocument pdf_document = pdf_document_bindable.Underlying; BibTexItem bibtex_item = pdf_document.BibTexItem; if (!BibTexTools.HasTitle(bibtex_item)) { return(null); } if (!BibTexTools.HasAuthor(bibtex_item)) { return(null); } List <NameTools.Name> names = NameTools.SplitAuthors(BibTexTools.GetAuthor(bibtex_item)); if (0 == names.Count) { return(null); } string tweet = String.Format("I'm reading {1}'s '{0}' with @Qiqqa http://qiqqa.com", BibTexTools.GetTitle(bibtex_item), names[0].last_name); if (140 < tweet.Length) { return(null); } return(tweet); }
public static void ExportBibTeX(List <PDFDocument> pdf_documents, string filename, Dictionary <string, PDFDocumentExportItem> pdf_document_export_items, bool include_additional_fields) { Logging.Info("Exporting entries to BibTex"); // Write out the header DateTime now = DateTime.Now; StringBuilder sb = new StringBuilder(); sb.AppendLine("% -------------------------------------------------------------------------"); sb.AppendLine(String.Format("% This BibTex file was generated by Qiqqa ({0}?ref=EXPBIB)", Common.Configuration.WebsiteAccess.Url_Documentation4Qiqqa)); sb.AppendLine(String.Format("% {0} {1}", now.ToLongDateString(), now.ToLongTimeString())); sb.AppendLine("% Version 3"); sb.AppendLine("% -------------------------------------------------------------------------"); sb.AppendLine(); // Write out the entries for (int i = 0; i < pdf_documents.Count; ++i) { PDFDocument pdf_document = pdf_documents[i]; try { StatusManager.Instance.UpdateStatus("BibTexExport", String.Format("Exporting entry {0} of {1}", i, pdf_documents.Count), i, pdf_documents.Count); // Get the bibtex string bibtex = pdf_document.BibTex; // If there is no bibtex, make a record if (String.IsNullOrEmpty(bibtex)) { bibtex = BibTexTools.GetEmptyArticleBibTeXTemplate(); string title = pdf_document.TitleCombined; if (Constants.TITLE_UNKNOWN != title) { bibtex = BibTexTools.SetAuthor(bibtex, title); } string author = pdf_document.AuthorsCombined; if (Constants.UNKNOWN_AUTHORS != author) { bibtex = BibTexTools.SetAuthor(bibtex, author); } string year = pdf_document.YearCombined; if (Constants.UNKNOWN_YEAR != year) { bibtex = BibTexTools.SetYear(bibtex, year); } } // NB: The ADDITION of the filename and tags causes the bibtex to be reparsed and formatted, // which currently loses all the nifty bibtex formatting language (e.g. double braces, etc) // // Once the bibtex parser is smarter, we can add this back in. Or perhaps make it an option... if (include_additional_fields) { // If we have an export filename, fill it in! if (pdf_document_export_items.ContainsKey(pdf_document.Fingerprint)) { string bibtex_filename = pdf_document_export_items[pdf_document.Fingerprint].filename; string bibtex_file = ConvertNormalFilenameToZoteroFilename(filename, bibtex_filename); bibtex = BibTexTools.SetField(bibtex, "filename", bibtex_filename); bibtex = BibTexTools.SetField(bibtex, "file", bibtex_file); } else { if (pdf_document.DocumentExists) { string bibtex_filename = pdf_document.DocumentPath; string bibtex_file = ConvertNormalFilenameToZoteroFilename(filename, bibtex_filename); bibtex = BibTexTools.SetField(bibtex, "filename", bibtex_filename); bibtex = BibTexTools.SetField(bibtex, "file", bibtex_file); } } // Fill in the tags and autotags { string tags = pdf_document.Tags; if (!String.IsNullOrEmpty(tags)) { bibtex = BibTexTools.SetField(bibtex, "Tags", tags); } HashSet <string> autotags_set = pdf_document.Library.AITagManager.AITags.GetTagsWithDocument(pdf_document.Fingerprint); string autotags = ArrayFormatter.ListElements(autotags_set.ToList(), ";"); if (!String.IsNullOrEmpty(autotags)) { bibtex = BibTexTools.SetField(bibtex, "AutoTags", autotags); } string keywords = BibTexTools.GetField(bibtex, "keywords"); if (!String.IsNullOrEmpty(tags)) { if (!String.IsNullOrEmpty(keywords)) { keywords = keywords + ";"; } keywords = keywords + tags; } if (!String.IsNullOrEmpty(autotags)) { if (!String.IsNullOrEmpty(keywords)) { keywords = keywords + ";"; } keywords = keywords + autotags; } if (!String.IsNullOrEmpty(keywords)) { bibtex = BibTexTools.SetField(bibtex, "keywords", keywords); } } } // Append the bibtex if (!String.IsNullOrEmpty(bibtex)) { sb.AppendLine(bibtex); sb.AppendLine(); } } catch (Exception ex) { Logging.Error(ex, "There was a problem exporting the bibtex for " + pdf_document); } } // Write to disk File.WriteAllText(filename, sb.ToString()); StatusManager.Instance.UpdateStatus("BibTexExport", String.Format("Exported your BibTex entries to {0}", filename)); }
private static ImportingIntoLibrary.FilenameWithMetadataImport ConvertEndnoteToFilenameWithMetadataImport(string endnote_database_filename, MYDRecord record) { BibTexItem bibtex_item = new BibTexItem(); string type = "article"; TransformType(record.reference_type, ref type); bibtex_item.Type = type; bibtex_item.Key = BibTexTools.GenerateRandomBibTeXKey(); foreach (var pair in record.fields) { string key = pair.Key; string value = pair.Value; TransformKeyValue(record.reference_type, ref key, ref value); if ("notes" == key) { continue; } if ("keywords" == key) { continue; } if ("link_to_pdf" == key) { continue; } bibtex_item[key] = value; } ImportingIntoLibrary.FilenameWithMetadataImport fwmi = new ImportingIntoLibrary.FilenameWithMetadataImport(); fwmi.tags.Add("import_endnote"); fwmi.tags.Add("import_endnote_" + Path.GetFileNameWithoutExtension(endnote_database_filename)); fwmi.bibtex = bibtex_item.ToBibTex(); if (record.fields.ContainsKey("notes")) { fwmi.notes = record.fields["notes"]; } if (record.fields.ContainsKey("keywords")) { string keywords = record.fields["keywords"]; string[] tags = keywords.Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); fwmi.tags.AddRange(tags); } // Handle the attachments if (record.fields.ContainsKey("link_to_pdf")) { string links_string = record.fields["link_to_pdf"]; string[] links = links_string.Split(new string[] { ",", "internal-pdf://", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries); // Build up the list of candidates string base_directory = endnote_database_filename.Substring(0, endnote_database_filename.Length - 4) + ".Data\\PDF\\"; List <string> pdf_links = new List <string>(); { // First candidates are those in the subdirectory corresponding to the .ENL file foreach (string link in links) { pdf_links.Add(base_directory + link); } // Second candidates are raw pathnames foreach (string link in links) { pdf_links.Add(link); } } // Use the first PDF file that exists in the file system foreach (string pdf_link in pdf_links) { if (pdf_link.ToLower().EndsWith(".pdf") && File.Exists(pdf_link)) { fwmi.filename = pdf_link; break; } } } return(fwmi); }
internal static MendeleyDatabaseDetails DetectMendeleyDatabaseDetails() { MendeleyDatabaseDetails mdd = new MendeleyDatabaseDetails(); string BASE_DIR_FOR_MENDELEY_DATABASE = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + @"\Mendeley Ltd\Mendeley Desktop\"; if (!Directory.Exists(BASE_DIR_FOR_MENDELEY_DATABASE)) { Logging.Info("Mendeley not found."); mdd.databases_found = 0; mdd.documents_found = 0; mdd.pdfs_found = 0; return(mdd); } try { string[] sqlite_filenames = Directory.GetFiles(BASE_DIR_FOR_MENDELEY_DATABASE, "*.sqlite", SearchOption.TopDirectoryOnly); foreach (string sqlite_filename in sqlite_filenames) { // Skip the monitor database if (sqlite_filename.EndsWith("monitor.sqlite")) { continue; } try { using (SQLiteConnection connection = new SQLiteConnection("Data Source=" + sqlite_filename)) { connection.Open(); // Build the authors lookup Dictionary <long, string> authors_lookup = new Dictionary <long, string>(); { string command_string = "SELECT * FROM DocumentContributors"; using (var command = new SQLiteCommand(command_string, connection)) { SQLiteDataReader reader = command.ExecuteReader(); while (reader.Read()) { long document_id = (long)reader["documentId"]; string surname = reader["lastName"] as string; string firstnames = reader["firstNames"] as string; string compound_name = (String.IsNullOrEmpty(surname)) ? firstnames : (String.IsNullOrEmpty(firstnames) ? surname : (surname + ", " + firstnames)); if (!String.IsNullOrEmpty(compound_name)) { if (!authors_lookup.ContainsKey(document_id)) { authors_lookup[document_id] = compound_name; } else { authors_lookup[document_id] = authors_lookup[document_id] + " AND " + compound_name; } } } } } Dictionary <long, List <string> > tags_lookup = new Dictionary <long, List <string> >(); { string command_string = "SELECT * FROM DocumentKeywords"; using (var command = new SQLiteCommand(command_string, connection)) { SQLiteDataReader reader = command.ExecuteReader(); while (reader.Read()) { long document_id = (long)reader["documentId"]; string keyword = reader["keyword"] as string; if (!String.IsNullOrEmpty(keyword)) { if (!tags_lookup.ContainsKey(document_id)) { tags_lookup[document_id] = new List <string>(); } tags_lookup[document_id].Add(keyword); } } } } // Get the bibtexes { //string command_string = "SELECT * FROM Documents WHERE 1=1 "; string command_string = "" + "SELECT * " + "FROM Documents " + "LEFT OUTER JOIN DocumentFiles ON Documents.id == DocumentFiles.documentId " + "LEFT OUTER JOIN Files ON DocumentFiles.Hash = Files.Hash " ; using (var command = new SQLiteCommand(command_string, connection)) { SQLiteDataReader reader = command.ExecuteReader(); ++mdd.databases_found; while (reader.Read()) { try { BibTexItem bibtex_item = new BibTexItem(); bibtex_item.Type = reader["type"] as string; bibtex_item.Key = reader["citationKey"] as string; if (String.IsNullOrEmpty(bibtex_item.Key)) { bibtex_item.Key = BibTexTools.GenerateRandomBibTeXKey(); } PopulateTentativeField(bibtex_item, reader, "title"); PopulateTentativeField(bibtex_item, reader, "abstract"); PopulateTentativeField(bibtex_item, reader, "advisor"); PopulateTentativeField(bibtex_item, reader, "city"); PopulateTentativeField(bibtex_item, reader, "country"); PopulateTentativeField(bibtex_item, reader, "day"); PopulateTentativeField(bibtex_item, reader, "month"); PopulateTentativeField(bibtex_item, reader, "dateAccessed", "accessed"); PopulateTentativeField(bibtex_item, reader, "department"); PopulateTentativeField(bibtex_item, reader, "doi"); PopulateTentativeField(bibtex_item, reader, "edition"); PopulateTentativeField(bibtex_item, reader, "institution"); PopulateTentativeField(bibtex_item, reader, "isbn"); PopulateTentativeField(bibtex_item, reader, "issn"); PopulateTentativeField(bibtex_item, reader, "issue"); PopulateTentativeField(bibtex_item, reader, "medium"); PopulateTentativeField(bibtex_item, reader, "pages"); PopulateTentativeField(bibtex_item, reader, "pmid"); PopulateTentativeField(bibtex_item, reader, "publication"); PopulateTentativeField(bibtex_item, reader, "publisher"); PopulateTentativeField(bibtex_item, reader, "sections", "section"); PopulateTentativeField(bibtex_item, reader, "series"); PopulateTentativeField(bibtex_item, reader, "session"); PopulateTentativeField(bibtex_item, reader, "volume"); PopulateTentativeField(bibtex_item, reader, "year"); long document_id = (long)reader["id"]; if (authors_lookup.ContainsKey(document_id)) { bibtex_item["author"] = authors_lookup[document_id]; } ImportingIntoLibrary.FilenameWithMetadataImport fwmi = new ImportingIntoLibrary.FilenameWithMetadataImport(); fwmi.tags.Add("import_mendeley"); fwmi.bibtex = bibtex_item.ToBibTex(); string filename = reader["localUrl"] as string; if (!String.IsNullOrEmpty(filename)) { const string FILE_PREFIX = "file:///"; if (filename.StartsWith(FILE_PREFIX)) { filename = filename.Substring(FILE_PREFIX.Length); } filename = Uri.UnescapeDataString(filename); filename = filename.Replace('/', '\\'); fwmi.filename = filename; ++mdd.pdfs_found; } if (tags_lookup.ContainsKey(document_id)) { fwmi.tags.AddRange(tags_lookup[document_id]); } string note = reader["note"] as string; if (!String.IsNullOrEmpty(note)) { note = note.Replace("<m:italic>", ""); note = note.Replace("</m:italic>", ""); note = note.Replace("<m:bold>", ""); note = note.Replace("</m:bold>", ""); note = note.Replace("<m:note>", ""); note = note.Replace("</m:note>", ""); note = note.Replace("<m:underline>", ""); note = note.Replace("</m:underline>", ""); note = note.Replace("<m:right>", ""); note = note.Replace("</m:right>", ""); note = note.Replace("<m:center>", ""); note = note.Replace("</m:center>", ""); note = note.Replace("<m:linebreak/>", "\n"); fwmi.notes = note; } mdd.metadata_imports.Add(fwmi); ++mdd.documents_found; } catch (Exception ex) { Logging.Error(ex, "Exception while extracting a Mendeley document."); } } } } } } catch (Exception ex) { Logging.Error(ex, "Exception while exploring for Mendeley instance in file '{0}'.", sqlite_filename); } } } catch (Exception ex) { Logging.Error(ex, "Exception while exploring for Mendeley instances."); } Logging.Info("Got {0} libraries with {1} documents and {2} PDFs.", mdd.databases_found, mdd.documents_found, mdd.pdfs_found); return(mdd); }
internal static bool InferBibTeX(PDFDocument pdf_document, bool manual_override) { if (MustBackoff() && !manual_override) { return(false); } if (!pdf_document.DocumentExists) { return(false); } if (!String.IsNullOrEmpty(pdf_document.BibTex) && !manual_override) { return(false); } if (pdf_document.AutoSuggested_BibTeXSearch && !manual_override) { return(false); } if (!ConfigurationManager.Instance.ConfigurationRecord.Metadata_AutomaticallyAssociateBibTeX && !manual_override) { return(false); } // Flag on this document that we have tried to do the bibtex pdf_document.AutoSuggested_BibTeXSearch = true; pdf_document.Bindable.NotifyPropertyChanged(() => pdf_document.AutoSuggested_BibTeXSearch); string title = pdf_document.TitleCombined; title = title.Trim(); if (String.IsNullOrEmpty(title)) { return(false); } if (title.Length < 10) { return(false); } // If there is only a single word in the title, it is not useful to us... if (-1 == title.IndexOf(' ')) { return(false); } // Unwanted automatic titles if (Constants.TITLE_UNKNOWN == title || pdf_document.DownloadLocation == title) { return(false); } // Get the search results! string json = DoSearch(title); if (null != json) { object o = JsonConvert.DeserializeObject(json); JArray ja = (JArray)o; // Get the bibtexes that suit this document List <string> bibtex_choices = new List <string>(); foreach (var jo in ja) { var bibtex = jo["_source"]["bibtex"].ToString(); if (String.IsNullOrEmpty(bibtex)) { continue; } BibTexItem bibtex_item = BibTexParser.ParseOne(bibtex, true); // Does the bibtex match sufficiently? Empty bibtex will be handled accordingly: no fit/match PDFSearchResultSet search_result_set; if (!BibTeXGoodnessOfFitEstimator.DoesBibTeXMatchDocument(bibtex_item, pdf_document, out search_result_set)) { continue; } // Does the title match sufficiently to the bibtex { string title_string = BibTexTools.GetTitle(bibtex_item); string title_string_tolower = title_string.Trim().ToLower(); string title_tolower = title.Trim().ToLower(); double similarity = StringTools.LewensteinSimilarity(title_tolower, title_string_tolower); if (0.75 > similarity) { continue; } } if (!bibtex.Contains(BibTeXActionComments.AUTO_BIBTEXSEARCH)) { bibtex = BibTeXActionComments.AUTO_BIBTEXSEARCH + "\r\n" + bibtex; } // If we get this far, we are happy with the bibtex bibtex_choices.Add(bibtex); } // Pick the longest matching bibtex if (0 < bibtex_choices.Count) { bibtex_choices.Sort(delegate(string a, string b) { if (a.Length > b.Length) { return(-1); } if (a.Length < b.Length) { return(+1); } return(0); } ); pdf_document.BibTex = bibtex_choices[0]; pdf_document.Bindable.NotifyPropertyChanged(() => pdf_document.BibTex); FeatureTrackingManager.Instance.UseFeature(Features.BibTeX_BibTeXSearchMatch); return(true); } } return(false); }