From 265ef57f0f48b04f4517d4c03da0f9c2f43f615a Mon Sep 17 00:00:00 2001 From: Yann Herklotz Date: Sat, 9 Jul 2022 18:38:45 +0100 Subject: Add new blog-post --- content.org | 197 +++++++++++++++++++++++++++++++ static/docs/ebib-papers.el/index.html | 153 ++++++++++++++++++++++++ themes/ymherklotz/static/css/default.css | 8 +- 3 files changed, 353 insertions(+), 5 deletions(-) create mode 100644 static/docs/ebib-papers.el/index.html diff --git a/content.org b/content.org index 0a96c9b..6a88908 100644 --- a/content.org +++ b/content.org @@ -35,6 +35,203 @@ is located. Here you can find all my previous posts: +** Downloading academic papers automatically +:PROPERTIES: +:EXPORT_DATE: 2022-07-09 +:EXPORT_FILE_NAME: ebib-papers +:EXPORT_HUGO_SECTION: blog +:EXPORT_HUGO_CUSTOM_FRONT_MATTER: :summary "" +:CUSTOM_ID: ebib-papers +:END: + +I've been using [[http://joostkremers.github.io/ebib/][ebib]] as my bibliography manager for the last three years of my PhD, and have loved +how integrated it is into Emacs. Whether writing in org-mode, LaTeX or ConTeXt, I can get +autocompletion for all of my references from my main bibliography file, and insert native citation +commands for the language that I am currently writing in. It even supports creating a +sub-bibliography file containing only the references that are used by the current project, but is +linked to my main bibliography file so that changes propagate in both directions. It also has +powerful filtering options that make it easy to group and find related papers. However, the main +reason I wanted to learn it initially was because of the extensibility that is inherent to an +Emacs-based application. + +*** Automatic ID Generation + +The first useful feature that ebib provides is the automatic ID generation, which reuses the +internal BibTeX ID generation provided by Emacs (~bibtex-generate-autokey~). I had already used the +automatic ID generation with [[https://github.com/jkitchin/org-ref][org-ref]], and had changed the generation function slightly so that it +did not generate colons in the key name, and had already used on my bibliography file. The +following is a great feature of Emacs and Lisp, which allows you to wrap an existing function with +more code so that this extra code gets executed every time the original function is called. In this +case it does a string replacement to remove any colons on the output of the original function. + +#+begin_src emacs-lisp +(advice-add 'bibtex-generate-autokey :around + (lambda (orig-func &rest args) + (replace-regexp-in-string ":" "" (apply orig-func args)))) +#+end_src + +As ebib reuses this function, my advice that I added around that function was automatically used by +all the automatic ID generation that ebib used and I therefore did not need to configure anything +else for it to behave properly for me. + +*** Automatic Paper Downloads + +Ebib allows for a lot of extra content to be stored together with your bibliography entry. It +handles this extra information nicely because it always uses the ID of the entry as a way to store +this extra information without having to create additional entries inside of the bib file. I use +this mainly to store notes associated to papers as well as store their PDF version. This allows me +to go to any entry in ebib and just press 'N' to view the associated notes, or 'f' to open the PDF +(inside of emacs of course). However, the latter assumes that you have manually downloaded the PDF +associated with that bib entry into the right folder and named it after the key of the entry in the +bib file. I used to do this manually, but it took quite a bit of work and seemed like something I +should automate. + +The first step is therefore just figuring out how to get the ID of the current entry when in the +ebib index buffer (the one that lists all the bib entries). I know of a function which can already +copy the key when hovering over the entry, which is bound to ~C k~, so we can have a look at what +function is executed when pressing these keys, using the lovely builtin src_emacs-lisp[:exports +code]{describe-key} function, and then at how this function is implemented by using +src_emacs-lisp[:exports code]{describe-function}, which also gives you the source code for the +function (which you can obviously modify as you want and reevaluate to change the behaviour at +runtime). We then find out that we can use the following function to retrieve the key of the entry: +src_emacs-lisp[:exports code]{ebib--get-key-at-point}. For example, if we want to create a function +that will check if a file exists for the current entry, we can write the following: + +#+begin_src emacs-lisp +(defun ebib-check-file () + "Check if current entry has a file associated with it." + (interactive) + (let ((key (ebib--get-key-at-point))) + (unless (file-exists-p (concat (car ebib-file-search-dirs) "/" key ".pdf")) + (error "[Ebib] No PDF found.")))) +#+end_src + +When executing this function in the ebib index buffer, we will get an error if the file is not +present, or nothing at all. src_emacs-lisp[:exports code]{ebib-file-search-dirs} in this case +contains a list of directories that should be searched for a file associated with the current entry +(and we only care about the first one in this case). + +Then, if the file is not present, we want to download the PDF, so we now want to write a simple +download function. Let's focus on getting papers from the [[https://dl.acm.org/][ACM]] first. In emacs we can download a +file from a URL using the src_emacs-lisp[:exports code]{url-copy-file} function, so all we need is +generate a URL to pass to that function. To do that we can check a few PDFs in the ACM and check +what the URL looks like. Luckily, it seems like it's based on the DOI for the paper, which should +be available in the bib entry, so we can write the following function: + +#+begin_src emacs-lisp +(defun acm-pdf-url (doi) + "Generate the URL for a paper from the ACM based on the DOI." + (concat "https://dl.acm.org/doi/pdf/" doi)) +#+end_src + +This of course assumes that you have access to the paper, either because it's open access or because +you have access through your university. We can then download it from there using the following: + +#+begin_src emacs-lisp +(defun download-pdf-from-doi (key doi) + "Download pdf from doi with KEY name." + (url-copy-file (acm-pdf-url doi) (concat (car ebib-file-search-dirs) "/" key ".pdf"))) +#+end_src + +And then wrap it in a top-level function which can then be called interactively, and will retrieve +all the important information from the current bib entry in ebib. + +#+begin_src emacs-lisp +(defun ebib-download-pdf-from-doi () + "Download a PDF for the current entry." + (interactive) + (let* ((key (ebib--get-key-at-point)) + (doi (ebib-get-field-value "doi" key ebib--cur-db 'noerror 'unbraced 'xref))) + (unless key (error "[Ebib] No key assigned to entry")) + (download-pdf-from-doi key doi))) +#+end_src + +As you can see, we can get values for arbitrary fields using the ~ebib-get-field-value~ function, +which I also found using the trick above concerning getting the key. + +This will only work with papers from the ACM, but we can easily add support for other publishers +such as [[https://www.springer.com/gb/][Springer]], [[https://ieeexplore.ieee.org/Xplore/home.jsp][IEEE]] and [[https://arxiv.org/][arXiv]]. This is mainly straightforward, except for the IEEE where I +needed to realise that in most cases they use the last few numbers of the DOI as their indexing +number, so I had to implement the function as follows: + +#+begin_src emacs-lisp +(defun ieee-pdf-url (doi) + "Retrieve a DOI pdf from the IEEE." + (when (string-match "\\.\\([0-9]*\\)$" doi) + (let ((doi-bit (match-string 1 doi))) + (concat "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber=" doi-bit "&ref=")))) +#+end_src + +ArXiv is also a bit special because it normally puts it's own unique codes into the 'eprint' field. + +**** A More Robust Downloader + +We now have all these functions that can download PDFs from various sources, but we just need a way +to decide which URL to use. We could ask the user to choose when they want to download the PDF, but +I argue that there is normally enough information in the bib entry to automatically choose. The +final heuristic I came up with, which seems to mostly work well, is the following: + +#+begin_src emacs-lisp +(defun download-pdf-from-doi (key &optional doi publisher eprint journal organization url) + "Download pdf from DOI with KEY name." + (let ((pub (or publisher "")) + (epr (or eprint "")) + (jour (or journal "")) + (org (or organization "")) + (link (or url ""))) + (url-copy-file (cond + ((not doi) link) + ((or (string-match "ACM" (s-upcase pub)) + (string-match "association for computing machinery" (s-downcase pub))) + (acm-pdf-url doi)) + ((string-match "arxiv" (s-downcase pub)) + (arxiv-pdf-url epr)) + ((or (string-match "IEEE" (s-upcase pub)) + (string-match "IEEE" (s-upcase jour)) + (string-match "IEEE" (s-upcase org))) + (ieee-pdf-url doi)) + ((string-match "springer" (s-downcase pub)) + (springer-pdf-url doi)) + (t (error "Cannot possibly find the PDF any other way"))) + (concat (car ebib-file-search-dirs) "/" key ".pdf")))) +#+end_src + +It looks at the DOI, publisher, eprint, journal, organization and a URL. Then, it first checks if +it got a DOI, which if it didn't means that the URL should be used. Then, it checks if the +publisher is the ACM using different possible spellings, and if so uses the ACM link to download the +PDF. Then it checks if the publisher is arXiv, and uses the eprint entry to download it. IEEE is +the trickiest, as it can appear in various locations based on the conference or journal of the +original entry. We therefore check the publisher field, journal field and organization filed. +Finally, we check if the publisher is Springer and download it from there. + +[[https://yannherklotz.com/docs/ebib-papers.el][The complete code is available.]] + +*** Automatic Syncing of Papers to a Remarkable Tablet + +Finally, reading papers on your laptop or on the desktop is not a great experience. I therefore got +myself a [[https://remarkable.com/][Remarkable tablet]], which has served me greatly for taking notes as well as reading papers. +The main selling point of the tablet is the extremely low latency for drawing and writing on the +tablet compared to other E Ink tablets. However, it also has a nifty feature which makes it ideal +to read papers even though it's essentially an A5 piece of paper. You can crop PDF margins which +make them much more readable without having to zoom in and move around the PDF, and this cropping +is consistent when turning pages as well as opening and closing the PDF. I also love that it runs +Linux instead of other tablets which usually run Android. + +However, one downside is that it has a pretty closed source ecosystem with respect to the +applications used for syncing files to the tablet. However, there is also a great community around +the Remarkable to counteract this, for example the great [[https://github.com/juruen/rmapi][rmapi]] tool which allows for downloading and +uploading files from the command-line, or the [[https://github.com/ax3l/lines-are-rusty][lines-are-rusty]] tool which produces SVG from +Remarkable lines files. + +Therefore, we can use rmapi to sync all the files in my biblography to the remarkable, by just +running: + +#+begin_src shell +ls *.pdf | xargs -n1 rmapi put +#+end_src + +which will try to upload all my files every time I call it, but nicely enough it fails quickly +whenever the file already exists on the Remarkable. ** TODO About the Promise of Performance from Formal Verification When one thinks about formal verification, one normally associates this with sacrificing performance diff --git a/static/docs/ebib-papers.el/index.html b/static/docs/ebib-papers.el/index.html new file mode 100644 index 0000000..689c764 --- /dev/null +++ b/static/docs/ebib-papers.el/index.html @@ -0,0 +1,153 @@ + + + + + random.el + + + +
+(defun acm-pdf-url (doi)
+  "Retrieve a DOI pdf from the ACM."
+  (concat "https://dl.acm.org/doi/pdf/" doi))
+
+(defun ieee-pdf-url (doi)
+  "Retrieve a DOI pdf from the IEEE."
+  (when (string-match "\\.\\([0-9]*\\)$" doi)
+    (let ((doi-bit (match-string 1 doi)))
+      (concat "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber=" doi-bit "&ref="))))
+
+(defun springer-pdf-url (doi)
+  "Retrieve a DOI pdf from the Springer."
+  (concat "https://link.springer.com/content/pdf/" doi ".pdf"))
+
+(defun arxiv-pdf-url (eprint)
+  "Download an arXiv pdf based on it's EPRINT number."
+  (concat "https://arxiv.org/pdf/" eprint ".pdf"))
+
+(defun download-pdf-from-doi (key &optional doi publisher eprint journal organization url)
+  "Download pdf from DOI with KEY name."
+  (let ((pub  (or publisher ""))
+        (epr  (or eprint ""))
+        (jour (or journal ""))
+        (org  (or organization ""))
+        (link (or url "")))
+    (url-copy-file (cond
+                    ((not doi) link)
+                    ((or (string-match "ACM" (s-upcase pub))
+                         (string-match "association for computing machinery" (s-downcase pub)))
+                     (acm-pdf-url doi))
+                    ((string-match "arxiv" (s-downcase pub))
+                     (arxiv-pdf-url epr))
+                    ((or (string-match "IEEE" (s-upcase pub))
+                         (string-match "IEEE" (s-upcase jour))
+                         (string-match "IEEE" (s-upcase org)))
+                     (ieee-pdf-url doi))
+                    ((string-match "springer" (s-downcase pub))
+                     (springer-pdf-url doi))
+                    (t (error "Cannot possibly find the PDF any other way")))
+                   (concat (car ebib-file-search-dirs) "/" key ".pdf"))))
+
+(defun download-pdf-from-link (link key)
+  (url-copy-file link
+                 (concat (car ebib-file-search-dirs) "/" key ".pdf")))
+
+(defun download-pdf-from-downloads (key)
+  (copy-file (concat "~/Downloads/" key ".pdf")
+             (concat (car ebib-file-search-dirs) "/" key ".pdf") t))
+
+(defun get-bib-from-doi (doi)
+  "Get the bibtex from DOI."
+  (shell-command (concat "curl -L -H \"Accept: application/x-bibtex; charset=utf-8\" "
+                         "https://doi.org/" doi)))
+
+(defun ebib-download-pdf-from-doi ()
+  "Download a PDF for the current entry."
+  (interactive)
+  (let* ((key (ebib--get-key-at-point))
+         (doi (ebib-get-field-value "doi" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (publisher (ebib-get-field-value "publisher" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (eprinttype (ebib-get-field-value "eprinttype" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (eprint (ebib-get-field-value "eprint" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (journal (ebib-get-field-value "journal" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (journaltitle (ebib-get-field-value "journaltitle" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (organization (ebib-get-field-value "organization" key ebib--cur-db 'noerror 'unbraced 'xref))
+         (url (ebib-get-field-value "url" key ebib--cur-db 'noerror 'unbraced 'xref)))
+    (unless key
+      (error "[Ebib] No key assigned to entry"))
+    (download-pdf-from-doi key doi (or publisher eprinttype) eprint (or journal journaltitle) organization url)))
+
+(defun ebib-check-file ()
+  "Download a PDF for the current entry."
+  (interactive)
+  (let ((key (ebib--get-key-at-point)))
+    (unless (file-exists-p (concat (car ebib-file-search-dirs) "/" key ".pdf"))
+      (error "[Ebib] No PDF found"))))
+
+ + diff --git a/themes/ymherklotz/static/css/default.css b/themes/ymherklotz/static/css/default.css index 601a56e..f9e7ef3 100644 --- a/themes/ymherklotz/static/css/default.css +++ b/themes/ymherklotz/static/css/default.css @@ -385,12 +385,10 @@ dl { } #TableOfContents { - line-height: 1; + line-height: 1.3; margin-left: 1em; -} - -#TableOfContents a { - font-size: 0.8em; + font-family: 'Iosevka Web', monospace; + font-size: 0.7em } #TableOfContents ul { -- cgit