public static bool DoesBibTeXMatchDocument(BibTexItem bibtex_item, PDFDocument pdf_document, out PDFSearchResultSet search_result_set)
        {
            try
            {
                string authors_string = BibTexTools.GetAuthor(bibtex_item);
                if (!String.IsNullOrEmpty(authors_string))
                {
                    List <NameTools.Name> names = NameTools.SplitAuthors(authors_string, PDFDocument.UNKNOWN_AUTHORS);
                    StringBuilder         sb    = new StringBuilder();
                    foreach (NameTools.Name name in names)
                    {
                        sb.AppendFormat("\"{0}\" ", name.last_name);
                    }

                    string names_search_string = sb.ToString();
                    if (!String.IsNullOrEmpty(names_search_string))
                    {
                        search_result_set = PDFSearcher.Search(pdf_document, 1, names_search_string, PDFSearcher.MATCH_CONTAINS);
                        if (0 < search_result_set.Count)
                        {
                            return(true);
                        }
                    }
                }
            }
            catch (Exception) { }

            search_result_set = new PDFSearchResultSet();
            return(false);
        }
Пример #2
0
        public void UnitTestMethod()
        {
            // Example 1: Loading BibTex file
            BibTexDataFile bib_1 = new BibTexDataFile("Resources\\test\\S0306457309000259.bib");



            // Example 2: Loading BibTex file
            String path = folderResources.findFile("S0306457309000259.bib", SearchOption.AllDirectories);

            // initializes bibtex data file object
            BibTexDataFile bib = new BibTexDataFile();

            // loads .bib or .bibtex file from path specified
            bib.Load(path, log);


            // converts loaded BibTex entries into DataTable, with all columns discovered in the entries
            DataTable dt = bib.ConvertToDataTable();

            // saves DataTable to Excel file, without adding Legend spreadsheet
            var finalPath = dt.serializeDataTable(Data.enums.reporting.dataTableExportEnum.excel, bib.name, folderResults, notation);

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var reportDataTable_ref = dt.GetReportAndSave(folderResults, notation);


            // Example 3: Short way

            // High-level method, creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var reportDataTable = BibTexTools.ExportToExcel(path, notation, log);


            // Example 4: Working with BibTexEntryModel

            // Creation of BibTex entry from code
            BibTexEntryModel entry = new BibTexEntryModel()
            {
                EntryKey  = "SOKOLOVA2009427",
                EntryType = "article",
                journal   = "Information Processing & Management",
                title     = "A systematic analysis of performance measures for classification tasks",
                keywords  = "Performance evaluation, Machine Learning, Text classification",
                year      = 2005,
                number    = 2,
                issn      = "0000-0000",
                @abstract = "Abs",
                doi       = "https://doi.org/10.1016/j.ipm.2009.03.002",
                url       = "http://www.sciencedirect.com/science/article/pii/S0306457309000259"
            };

            // Creation of data table collection
            DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation);
        }
        private static void ExtractTagsFromBibTeXField(string bibtex, string TAG, List <string> tags)
        {
            string vals = BibTexTools.GetField(bibtex, TAG);

            if (!String.IsNullOrEmpty(vals))
            {
                string[] ret = vals.Split(new char[] { ',', ';' }, StringSplitOptions.RemoveEmptyEntries);
                tags.AddRange(ret.Select(x => x.Trim()));
            }
        }
Пример #4
0
        private string CreatePaperTweet()
        {
            var pdf_document_bindable = DataContext as AugmentedBindable <PDFDocument>;

            if (null == pdf_document_bindable)
            {
                return(null);
            }

            PDFDocument pdf_document = pdf_document_bindable.Underlying;

            BibTexItem bibtex_item = pdf_document.BibTexItem;

            if (!BibTexTools.HasTitle(bibtex_item))
            {
                return(null);
            }

            if (!BibTexTools.HasAuthor(bibtex_item))
            {
                return(null);
            }
            List <NameTools.Name> names = NameTools.SplitAuthors(BibTexTools.GetAuthor(bibtex_item));

            if (0 == names.Count)
            {
                return(null);
            }

            string tweet = String.Format("I'm reading {1}'s '{0}' with @Qiqqa http://qiqqa.com", BibTexTools.GetTitle(bibtex_item), names[0].last_name);

            if (140 < tweet.Length)
            {
                return(null);
            }

            return(tweet);
        }
        public static void ExportBibTeX(List <PDFDocument> pdf_documents, string filename, Dictionary <string, PDFDocumentExportItem> pdf_document_export_items, bool include_additional_fields)
        {
            Logging.Info("Exporting entries to BibTex");

            // Write out the header
            DateTime      now = DateTime.Now;
            StringBuilder sb  = new StringBuilder();

            sb.AppendLine("% -------------------------------------------------------------------------");
            sb.AppendLine(String.Format("% This BibTex file was generated by Qiqqa ({0}?ref=EXPBIB)", Common.Configuration.WebsiteAccess.Url_Documentation4Qiqqa));
            sb.AppendLine(String.Format("% {0} {1}", now.ToLongDateString(), now.ToLongTimeString()));
            sb.AppendLine("% Version 3");
            sb.AppendLine("% -------------------------------------------------------------------------");
            sb.AppendLine();

            // Write out the entries
            for (int i = 0; i < pdf_documents.Count; ++i)
            {
                PDFDocument pdf_document = pdf_documents[i];

                try
                {
                    StatusManager.Instance.UpdateStatus("BibTexExport", String.Format("Exporting entry {0} of {1}", i, pdf_documents.Count), i, pdf_documents.Count);

                    // Get the bibtex
                    string bibtex = pdf_document.BibTex;

                    // If there is no bibtex, make a record
                    if (String.IsNullOrEmpty(bibtex))
                    {
                        bibtex = BibTexTools.GetEmptyArticleBibTeXTemplate();

                        string title = pdf_document.TitleCombined;
                        if (Constants.TITLE_UNKNOWN != title)
                        {
                            bibtex = BibTexTools.SetAuthor(bibtex, title);
                        }

                        string author = pdf_document.AuthorsCombined;
                        if (Constants.UNKNOWN_AUTHORS != author)
                        {
                            bibtex = BibTexTools.SetAuthor(bibtex, author);
                        }

                        string year = pdf_document.YearCombined;
                        if (Constants.UNKNOWN_YEAR != year)
                        {
                            bibtex = BibTexTools.SetYear(bibtex, year);
                        }
                    }

                    // NB: The ADDITION of the filename and tags causes the bibtex to be reparsed and formatted,
                    // which currently loses all the nifty bibtex formatting language (e.g. double braces, etc)
                    //
                    // Once the bibtex parser is smarter, we can add this back in.  Or perhaps make it an option...
                    if (include_additional_fields)
                    {
                        // If we have an export filename, fill it in!
                        if (pdf_document_export_items.ContainsKey(pdf_document.Fingerprint))
                        {
                            string bibtex_filename = pdf_document_export_items[pdf_document.Fingerprint].filename;
                            string bibtex_file     = ConvertNormalFilenameToZoteroFilename(filename, bibtex_filename);

                            bibtex = BibTexTools.SetField(bibtex, "filename", bibtex_filename);
                            bibtex = BibTexTools.SetField(bibtex, "file", bibtex_file);
                        }
                        else
                        {
                            if (pdf_document.DocumentExists)
                            {
                                string bibtex_filename = pdf_document.DocumentPath;
                                string bibtex_file     = ConvertNormalFilenameToZoteroFilename(filename, bibtex_filename);

                                bibtex = BibTexTools.SetField(bibtex, "filename", bibtex_filename);
                                bibtex = BibTexTools.SetField(bibtex, "file", bibtex_file);
                            }
                        }

                        // Fill in the tags and autotags
                        {
                            string tags = pdf_document.Tags;
                            if (!String.IsNullOrEmpty(tags))
                            {
                                bibtex = BibTexTools.SetField(bibtex, "Tags", tags);
                            }

                            HashSet <string> autotags_set = pdf_document.Library.AITagManager.AITags.GetTagsWithDocument(pdf_document.Fingerprint);
                            string           autotags     = ArrayFormatter.ListElements(autotags_set.ToList(), ";");
                            if (!String.IsNullOrEmpty(autotags))
                            {
                                bibtex = BibTexTools.SetField(bibtex, "AutoTags", autotags);
                            }

                            string keywords = BibTexTools.GetField(bibtex, "keywords");
                            if (!String.IsNullOrEmpty(tags))
                            {
                                if (!String.IsNullOrEmpty(keywords))
                                {
                                    keywords = keywords + ";";
                                }
                                keywords = keywords + tags;
                            }
                            if (!String.IsNullOrEmpty(autotags))
                            {
                                if (!String.IsNullOrEmpty(keywords))
                                {
                                    keywords = keywords + ";";
                                }
                                keywords = keywords + autotags;
                            }
                            if (!String.IsNullOrEmpty(keywords))
                            {
                                bibtex = BibTexTools.SetField(bibtex, "keywords", keywords);
                            }
                        }
                    }


                    // Append the bibtex
                    if (!String.IsNullOrEmpty(bibtex))
                    {
                        sb.AppendLine(bibtex);
                        sb.AppendLine();
                    }
                }

                catch (Exception ex)
                {
                    Logging.Error(ex, "There was a problem exporting the bibtex for " + pdf_document);
                }
            }

            // Write to disk
            File.WriteAllText(filename, sb.ToString());

            StatusManager.Instance.UpdateStatus("BibTexExport", String.Format("Exported your BibTex entries to {0}", filename));
        }
        private static ImportingIntoLibrary.FilenameWithMetadataImport ConvertEndnoteToFilenameWithMetadataImport(string endnote_database_filename, MYDRecord record)
        {
            BibTexItem bibtex_item = new BibTexItem();

            string type = "article";

            TransformType(record.reference_type, ref type);
            bibtex_item.Type = type;
            bibtex_item.Key  = BibTexTools.GenerateRandomBibTeXKey();

            foreach (var pair in record.fields)
            {
                string key   = pair.Key;
                string value = pair.Value;

                TransformKeyValue(record.reference_type, ref key, ref value);

                if ("notes" == key)
                {
                    continue;
                }
                if ("keywords" == key)
                {
                    continue;
                }
                if ("link_to_pdf" == key)
                {
                    continue;
                }

                bibtex_item[key] = value;
            }

            ImportingIntoLibrary.FilenameWithMetadataImport fwmi = new ImportingIntoLibrary.FilenameWithMetadataImport();
            fwmi.tags.Add("import_endnote");
            fwmi.tags.Add("import_endnote_" + Path.GetFileNameWithoutExtension(endnote_database_filename));
            fwmi.bibtex = bibtex_item.ToBibTex();

            if (record.fields.ContainsKey("notes"))
            {
                fwmi.notes = record.fields["notes"];
            }

            if (record.fields.ContainsKey("keywords"))
            {
                string   keywords = record.fields["keywords"];
                string[] tags     = keywords.Split(new char[] { ' ', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);
                fwmi.tags.AddRange(tags);
            }

            // Handle the attachments
            if (record.fields.ContainsKey("link_to_pdf"))
            {
                string   links_string = record.fields["link_to_pdf"];
                string[] links        = links_string.Split(new string[] { ",", "internal-pdf://", "\r", "\n" }, StringSplitOptions.RemoveEmptyEntries);

                // Build up the list of candidates
                string        base_directory = endnote_database_filename.Substring(0, endnote_database_filename.Length - 4) + ".Data\\PDF\\";
                List <string> pdf_links      = new List <string>();
                {
                    // First candidates are those in the subdirectory corresponding to the .ENL file
                    foreach (string link in links)
                    {
                        pdf_links.Add(base_directory + link);
                    }

                    // Second candidates are raw pathnames
                    foreach (string link in links)
                    {
                        pdf_links.Add(link);
                    }
                }

                // Use the first PDF file that exists in the file system
                foreach (string pdf_link in pdf_links)
                {
                    if (pdf_link.ToLower().EndsWith(".pdf") && File.Exists(pdf_link))
                    {
                        fwmi.filename = pdf_link;
                        break;
                    }
                }
            }

            return(fwmi);
        }
        internal static MendeleyDatabaseDetails DetectMendeleyDatabaseDetails()
        {
            MendeleyDatabaseDetails mdd = new MendeleyDatabaseDetails();

            string BASE_DIR_FOR_MENDELEY_DATABASE = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + @"\Mendeley Ltd\Mendeley Desktop\";

            if (!Directory.Exists(BASE_DIR_FOR_MENDELEY_DATABASE))
            {
                Logging.Info("Mendeley not found.");
                mdd.databases_found = 0;
                mdd.documents_found = 0;
                mdd.pdfs_found      = 0;
                return(mdd);
            }

            try
            {
                string[] sqlite_filenames = Directory.GetFiles(BASE_DIR_FOR_MENDELEY_DATABASE, "*.sqlite", SearchOption.TopDirectoryOnly);
                foreach (string sqlite_filename in sqlite_filenames)
                {
                    // Skip the monitor database
                    if (sqlite_filename.EndsWith("monitor.sqlite"))
                    {
                        continue;
                    }

                    try
                    {
                        using (SQLiteConnection connection = new SQLiteConnection("Data Source=" + sqlite_filename))
                        {
                            connection.Open();

                            // Build the authors lookup
                            Dictionary <long, string> authors_lookup = new Dictionary <long, string>();
                            {
                                string command_string = "SELECT * FROM DocumentContributors";
                                using (var command = new SQLiteCommand(command_string, connection))
                                {
                                    SQLiteDataReader reader = command.ExecuteReader();
                                    while (reader.Read())
                                    {
                                        long   document_id   = (long)reader["documentId"];
                                        string surname       = reader["lastName"] as string;
                                        string firstnames    = reader["firstNames"] as string;
                                        string compound_name = (String.IsNullOrEmpty(surname)) ? firstnames : (String.IsNullOrEmpty(firstnames) ? surname : (surname + ", " + firstnames));
                                        if (!String.IsNullOrEmpty(compound_name))
                                        {
                                            if (!authors_lookup.ContainsKey(document_id))
                                            {
                                                authors_lookup[document_id] = compound_name;
                                            }
                                            else
                                            {
                                                authors_lookup[document_id] = authors_lookup[document_id] + " AND " + compound_name;
                                            }
                                        }
                                    }
                                }
                            }

                            Dictionary <long, List <string> > tags_lookup = new Dictionary <long, List <string> >();
                            {
                                string command_string = "SELECT * FROM DocumentKeywords";
                                using (var command = new SQLiteCommand(command_string, connection))
                                {
                                    SQLiteDataReader reader = command.ExecuteReader();
                                    while (reader.Read())
                                    {
                                        long   document_id = (long)reader["documentId"];
                                        string keyword     = reader["keyword"] as string;
                                        if (!String.IsNullOrEmpty(keyword))
                                        {
                                            if (!tags_lookup.ContainsKey(document_id))
                                            {
                                                tags_lookup[document_id] = new List <string>();
                                            }

                                            tags_lookup[document_id].Add(keyword);
                                        }
                                    }
                                }
                            }


                            // Get the bibtexes
                            {
                                //string command_string = "SELECT * FROM Documents WHERE 1=1 ";
                                string command_string =
                                    ""
                                    + "SELECT * "
                                    + "FROM Documents "
                                    + "LEFT OUTER JOIN DocumentFiles ON Documents.id == DocumentFiles.documentId "
                                    + "LEFT OUTER JOIN Files ON DocumentFiles.Hash = Files.Hash "
                                ;

                                using (var command = new SQLiteCommand(command_string, connection))
                                {
                                    SQLiteDataReader reader = command.ExecuteReader();

                                    ++mdd.databases_found;

                                    while (reader.Read())
                                    {
                                        try
                                        {
                                            BibTexItem bibtex_item = new BibTexItem();

                                            bibtex_item.Type = reader["type"] as string;
                                            bibtex_item.Key  = reader["citationKey"] as string;
                                            if (String.IsNullOrEmpty(bibtex_item.Key))
                                            {
                                                bibtex_item.Key = BibTexTools.GenerateRandomBibTeXKey();
                                            }

                                            PopulateTentativeField(bibtex_item, reader, "title");
                                            PopulateTentativeField(bibtex_item, reader, "abstract");
                                            PopulateTentativeField(bibtex_item, reader, "advisor");
                                            PopulateTentativeField(bibtex_item, reader, "city");
                                            PopulateTentativeField(bibtex_item, reader, "country");
                                            PopulateTentativeField(bibtex_item, reader, "day");
                                            PopulateTentativeField(bibtex_item, reader, "month");
                                            PopulateTentativeField(bibtex_item, reader, "dateAccessed", "accessed");
                                            PopulateTentativeField(bibtex_item, reader, "department");
                                            PopulateTentativeField(bibtex_item, reader, "doi");
                                            PopulateTentativeField(bibtex_item, reader, "edition");
                                            PopulateTentativeField(bibtex_item, reader, "institution");
                                            PopulateTentativeField(bibtex_item, reader, "isbn");
                                            PopulateTentativeField(bibtex_item, reader, "issn");
                                            PopulateTentativeField(bibtex_item, reader, "issue");
                                            PopulateTentativeField(bibtex_item, reader, "medium");
                                            PopulateTentativeField(bibtex_item, reader, "pages");
                                            PopulateTentativeField(bibtex_item, reader, "pmid");
                                            PopulateTentativeField(bibtex_item, reader, "publication");
                                            PopulateTentativeField(bibtex_item, reader, "publisher");
                                            PopulateTentativeField(bibtex_item, reader, "sections", "section");
                                            PopulateTentativeField(bibtex_item, reader, "series");
                                            PopulateTentativeField(bibtex_item, reader, "session");
                                            PopulateTentativeField(bibtex_item, reader, "volume");
                                            PopulateTentativeField(bibtex_item, reader, "year");

                                            long document_id = (long)reader["id"];
                                            if (authors_lookup.ContainsKey(document_id))
                                            {
                                                bibtex_item["author"] = authors_lookup[document_id];
                                            }

                                            ImportingIntoLibrary.FilenameWithMetadataImport fwmi = new ImportingIntoLibrary.FilenameWithMetadataImport();
                                            fwmi.tags.Add("import_mendeley");
                                            fwmi.bibtex = bibtex_item.ToBibTex();

                                            string filename = reader["localUrl"] as string;
                                            if (!String.IsNullOrEmpty(filename))
                                            {
                                                const string FILE_PREFIX = "file:///";
                                                if (filename.StartsWith(FILE_PREFIX))
                                                {
                                                    filename = filename.Substring(FILE_PREFIX.Length);
                                                }

                                                filename = Uri.UnescapeDataString(filename);
                                                filename = filename.Replace('/', '\\');

                                                fwmi.filename = filename;

                                                ++mdd.pdfs_found;
                                            }

                                            if (tags_lookup.ContainsKey(document_id))
                                            {
                                                fwmi.tags.AddRange(tags_lookup[document_id]);
                                            }

                                            string note = reader["note"] as string;
                                            if (!String.IsNullOrEmpty(note))
                                            {
                                                note = note.Replace("<m:italic>", "");
                                                note = note.Replace("</m:italic>", "");
                                                note = note.Replace("<m:bold>", "");
                                                note = note.Replace("</m:bold>", "");
                                                note = note.Replace("<m:note>", "");
                                                note = note.Replace("</m:note>", "");
                                                note = note.Replace("<m:underline>", "");
                                                note = note.Replace("</m:underline>", "");
                                                note = note.Replace("<m:right>", "");
                                                note = note.Replace("</m:right>", "");
                                                note = note.Replace("<m:center>", "");
                                                note = note.Replace("</m:center>", "");
                                                note = note.Replace("<m:linebreak/>", "\n");

                                                fwmi.notes = note;
                                            }

                                            mdd.metadata_imports.Add(fwmi);

                                            ++mdd.documents_found;
                                        }

                                        catch (Exception ex)
                                        {
                                            Logging.Error(ex, "Exception while extracting a Mendeley document.");
                                        }
                                    }
                                }
                            }
                        }
                    }

                    catch (Exception ex)
                    {
                        Logging.Error(ex, "Exception while exploring for Mendeley instance in file '{0}'.", sqlite_filename);
                    }
                }
            }
            catch (Exception ex)
            {
                Logging.Error(ex, "Exception while exploring for Mendeley instances.");
            }

            Logging.Info("Got {0} libraries with {1} documents and {2} PDFs.", mdd.databases_found, mdd.documents_found, mdd.pdfs_found);

            return(mdd);
        }
        internal static bool InferBibTeX(PDFDocument pdf_document, bool manual_override)
        {
            if (MustBackoff() && !manual_override)
            {
                return(false);
            }
            if (!pdf_document.DocumentExists)
            {
                return(false);
            }
            if (!String.IsNullOrEmpty(pdf_document.BibTex) && !manual_override)
            {
                return(false);
            }
            if (pdf_document.AutoSuggested_BibTeXSearch && !manual_override)
            {
                return(false);
            }
            if (!ConfigurationManager.Instance.ConfigurationRecord.Metadata_AutomaticallyAssociateBibTeX && !manual_override)
            {
                return(false);
            }

            // Flag on this document that we have tried to do the bibtex
            pdf_document.AutoSuggested_BibTeXSearch = true;
            pdf_document.Bindable.NotifyPropertyChanged(() => pdf_document.AutoSuggested_BibTeXSearch);

            string title = pdf_document.TitleCombined;

            title = title.Trim();
            if (String.IsNullOrEmpty(title))
            {
                return(false);
            }
            if (title.Length < 10)
            {
                return(false);
            }

            // If there is only a single word in the title, it is not useful to us...
            if (-1 == title.IndexOf(' '))
            {
                return(false);
            }

            // Unwanted automatic titles
            if (Constants.TITLE_UNKNOWN == title || pdf_document.DownloadLocation == title)
            {
                return(false);
            }

            // Get the search results!
            string json = DoSearch(title);

            if (null != json)
            {
                object o  = JsonConvert.DeserializeObject(json);
                JArray ja = (JArray)o;

                // Get the bibtexes that suit this document
                List <string> bibtex_choices = new List <string>();
                foreach (var jo in ja)
                {
                    var bibtex = jo["_source"]["bibtex"].ToString();
                    if (String.IsNullOrEmpty(bibtex))
                    {
                        continue;
                    }

                    BibTexItem bibtex_item = BibTexParser.ParseOne(bibtex, true);

                    // Does the bibtex match sufficiently? Empty bibtex will be handled accordingly: no fit/match
                    PDFSearchResultSet search_result_set;
                    if (!BibTeXGoodnessOfFitEstimator.DoesBibTeXMatchDocument(bibtex_item, pdf_document, out search_result_set))
                    {
                        continue;
                    }

                    // Does the title match sufficiently to the bibtex
                    {
                        string title_string         = BibTexTools.GetTitle(bibtex_item);
                        string title_string_tolower = title_string.Trim().ToLower();
                        string title_tolower        = title.Trim().ToLower();
                        double similarity           = StringTools.LewensteinSimilarity(title_tolower, title_string_tolower);
                        if (0.75 > similarity)
                        {
                            continue;
                        }
                    }

                    if (!bibtex.Contains(BibTeXActionComments.AUTO_BIBTEXSEARCH))
                    {
                        bibtex =
                            BibTeXActionComments.AUTO_BIBTEXSEARCH
                            + "\r\n"
                            + bibtex;
                    }

                    // If we get this far, we are happy with the bibtex
                    bibtex_choices.Add(bibtex);
                }

                // Pick the longest matching bibtex
                if (0 < bibtex_choices.Count)
                {
                    bibtex_choices.Sort(delegate(string a, string b)
                    {
                        if (a.Length > b.Length)
                        {
                            return(-1);
                        }
                        if (a.Length < b.Length)
                        {
                            return(+1);
                        }
                        return(0);
                    }
                                        );

                    pdf_document.BibTex = bibtex_choices[0];
                    pdf_document.Bindable.NotifyPropertyChanged(() => pdf_document.BibTex);

                    FeatureTrackingManager.Instance.UseFeature(Features.BibTeX_BibTeXSearchMatch);

                    return(true);
                }
            }

            return(false);
        }