Client for Various 'CrossRef' 'APIs'

Client for various 'CrossRef' 'APIs', including 'metadata' search with their old and newer search 'APIs', get 'citations' in various formats (including 'bibtex', 'citeproc-json', 'rdf-xml', etc.), convert 'DOIs' to 'PMIDs', and 'vice versa', get citations for 'DOIs', and get links to full text of articles when available.


R interface to various CrossRef APIs

Stable version from CRAN

install.packages("rcrossref")

Or development version from GitHub

install.packages("devtools")
devtools::install_github("ropensci/rcrossref")

Load rcrossref

library('rcrossref')

Use CrossRef's DOI Content Negotiation service, where you can citations back in various formats, including apa

cr_cn(dois = "10.1126/science.169.3946.635", format = "text", style = "apa")
#> [1] "Frank, H. S. (1970). The Structure of Ordinary Water: New data and interpretations are yielding new insights into this fascinating substance. Science, 169(3946), 635–641. doi:10.1126/science.169.3946.635"

bibtex

cat(cr_cn(dois = "10.1126/science.169.3946.635", format = "bibtex"))
#> @article{Frank_1970,
#>  doi = {10.1126/science.169.3946.635},
#>  url = {http://dx.doi.org/10.1126/science.169.3946.635},
#>  year = 1970,
#>  month = {aug},
#>  publisher = {American Association for the Advancement of Science ({AAAS})},
#>  volume = {169},
#>  number = {3946},
#>  pages = {635--641},
#>  author = {H. S. Frank},
#>  title = {The Structure of Ordinary Water: New data and interpretations are yielding new insights into this fascinating substance},
#>  journal = {Science}
#> }

bibentry

cr_cn(dois = "10.6084/m9.figshare.97218", format = "bibentry")
#> Boettiger C (2012). "Regime shifts in ecology and evolution (PhD
#> Dissertation)." doi: 10.6084/m9.figshare.97218 (URL:
#> http://doi.org/10.6084/m9.figshare.97218), <URL:
#> https://doi.org/10.6084/m9.figshare.97218>.

Citation count, using OpenURL

cr_citation_count(doi = "10.1371/journal.pone.0042793")
#> [1] 10

The following functions all use the CrossRef API.

cr_funders(query = "NSF")
#> $meta
#>   total_results search_terms start_index items_per_page
#> 1             8          NSF           0             20
#> 
#> $data
#> # A tibble: 8 × 6
#>             id      location
#>          <chr>         <chr>
#> 1    100003187 United States
#> 2    100008367       Denmark
#> 3 501100004190        Norway
#> 4    100000179 United States
#> 5    100006445 United States
#> 6 501100000930     Australia
#> 7    100000001 United States
#> 8 501100001809         China
#> # ... with 4 more variables: name <chr>, alt.names <chr>, uri <chr>,
#> #   tokens <chr>
cr_agency(dois = '10.13039/100000001')
#> $DOI
#> [1] "10.13039/100000001"
#> 
#> $agency
#> $agency$id
#> [1] "crossref"
#> 
#> $agency$label
#> [1] "CrossRef"
cr_works(filter = c(has_orcid = TRUE, from_pub_date = '2004-04-04'), limit = 1)
#> $meta
#>   total_results search_terms start_index items_per_page
#> 1        419228           NA           0              1
#> 
#> $data
#> # A tibble: 1 × 24
#>   alternative.id container.title    created  deposited
#>            <chr>           <chr>      <chr>      <chr>
#> 1                                2015-11-11 2015-11-11
#> # ... with 20 more variables: DOI <chr>, funder <list>, indexed <chr>,
#> #   ISBN <chr>, ISSN <chr>, issued <chr>, link <list>, member <chr>,
#> #   prefix <chr>, publisher <chr>, reference.count <chr>, score <chr>,
#> #   source <chr>, subject <chr>, title <chr>, type <chr>, URL <chr>,
#> #   assertion <list>, author <list>, `clinical-trial-number` <list>
#> 
#> $facets
#> NULL
cr_journals(issn = c('1803-2427','2326-4225'))
#> # A tibble: 2 × 16
#>   alternative.id container.title created deposited funder indexed  ISBN
#>            <chr>           <chr>   <chr>     <chr> <list>   <chr> <chr>
#> 1                                                  <NULL>              
#> 2                                                  <NULL>              
#> # ... with 9 more variables: ISSN <chr>, issued <chr>, link <list>,
#> #   publisher <chr>, subject <chr>, title <chr>, assertion <list>,
#> #   author <list>, `clinical-trial-number` <list>
cr_licenses(query = 'elsevier')
#> $meta
#>   total_results search_terms start_index items_per_page
#> 1            13     elsevier           0             20
#> 
#> $data
#> # A tibble: 13 × 2
#>                                                                            URL
#>                                                                          <chr>
#> 1                            http://creativecommons.org/licenses/by-nc-nd/3.0/
#> 2                            http://creativecommons.org/licenses/by-nc-nd/4.0/
#> 3                               http://creativecommons.org/licenses/by-nc/4.0/
#> 4                                  http://creativecommons.org/licenses/by/3.0/
#> 5                                  http://creativecommons.org/licenses/by/4.0/
#> 6                                   http://doi.wiley.com/10.1002/tdm_license_1
#> 7                            http://onlinelibrary.wiley.com/termsAndConditions
#> 8         http://www.acm.org/publications/policies/copyright_policy#Background
#> 9                         http://www.elsevier.com/open-access/userlicense/1.0/
#> 10                                http://www.elsevier.com/tdm/userlicense/1.0/
#> 11                                                 http://www.springer.com/tdm
#> 12 © 2007 Elsevier Masson SAS. All rights reserved. The patient figure in Figu
#> 13 © 2012, Elsevier Inc., All Rights Reserved. Figure 8, part (B) (images of H
#> # ... with 1 more variables: work.count <int>
cr_prefixes(prefixes = c('10.1016','10.1371','10.1023','10.4176','10.1093'))
#> $meta
#> NULL
#> 
#> $data
#>                               member                             name
#> 1   http://id.crossref.org/member/78                      Elsevier BV
#> 2  http://id.crossref.org/member/340 Public Library of Science (PLoS)
#> 3  http://id.crossref.org/member/297                  Springer Nature
#> 4 http://id.crossref.org/member/1989             Co-Action Publishing
#> 5  http://id.crossref.org/member/286    Oxford University Press (OUP)
#>                                  prefix
#> 1 http://id.crossref.org/prefix/10.1016
#> 2 http://id.crossref.org/prefix/10.1371
#> 3 http://id.crossref.org/prefix/10.1023
#> 4 http://id.crossref.org/prefix/10.4176
#> 5 http://id.crossref.org/prefix/10.1093
#> 
#> $facets
#> list()
cr_members(query = 'ecology', limit = 5)
#> $meta
#>   total_results search_terms start_index items_per_page
#> 1            18      ecology           0              5
#> 
#> $data
#> # A tibble: 5 × 48
#>      id
#>   <int>
#> 1  7052
#> 2  6933
#> 3  7278
#> 4  7745
#> 5  9167
#> # ... with 47 more variables: primary_name <chr>, location <chr>,
#> #   last_status_check_time <date>, backfile.dois <chr>,
#> #   current.dois <chr>, total.dois <chr>, prefixes <chr>,
#> #   coverge.affiliations.current <chr>, coverge.funders.backfile <chr>,
#> #   coverge.licenses.backfile <chr>, coverge.funders.current <chr>,
#> #   coverge.affiliations.backfile <chr>,
#> #   coverge.resource.links.backfile <chr>, coverge.orcids.backfile <chr>,
#> #   coverge.update.policies.current <chr>, coverge.orcids.current <chr>,
#> #   coverge.references.backfile <chr>,
#> #   coverge.award.numbers.backfile <chr>,
#> #   coverge.update.policies.backfile <chr>,
#> #   coverge.licenses.current <chr>, coverge.award.numbers.current <chr>,
#> #   coverge.abstracts.backfile <chr>,
#> #   coverge.resource.links.current <chr>, coverge.abstracts.current <chr>,
#> #   coverge.references.current <chr>,
#> #   flags.deposits.abstracts.current <chr>,
#> #   flags.deposits.orcids.current <chr>, flags.deposits <chr>,
#> #   flags.deposits.affiliations.backfile <chr>,
#> #   flags.deposits.update.policies.backfile <chr>,
#> #   flags.deposits.award.numbers.current <chr>,
#> #   flags.deposits.resource.links.current <chr>,
#> #   flags.deposits.articles <chr>,
#> #   flags.deposits.affiliations.current <chr>,
#> #   flags.deposits.funders.current <chr>,
#> #   flags.deposits.references.backfile <chr>,
#> #   flags.deposits.abstracts.backfile <chr>,
#> #   flags.deposits.licenses.backfile <chr>,
#> #   flags.deposits.award.numbers.backfile <chr>,
#> #   flags.deposits.references.current <chr>,
#> #   flags.deposits.resource.links.backfile <chr>,
#> #   flags.deposits.orcids.backfile <chr>,
#> #   flags.deposits.funders.backfile <chr>,
#> #   flags.deposits.update.policies.current <chr>,
#> #   flags.deposits.licenses.current <chr>, names <chr>, tokens <chr>
#> 
#> $facets
#> NULL

cr_r() uses the function cr_works() internally.

cr_r()
#>  [1] "10.1039/ct8885300641"                
#>  [2] "10.1063/1.1709474"                   
#>  [3] "10.1111/j.1708-8305.1996.tb00750.x"  
#>  [4] "10.1037//1076-898x.2.3.270"          
#>  [5] "10.4337/9781783471171.00028"         
#>  [6] "10.1007/978-1-349-07777-9_4"         
#>  [7] "10.1111/b.9781405116749.2003.00054.x"
#>  [8] "10.1016/j.crvi.2004.09.010"          
#>  [9] "10.1002/uog.15794"                   
#> [10] "10.14731/kjis.2013.12.11.2.295"

You can pass in the number of DOIs you want back (default is 10)

cr_r(2)
#> [1] "10.1038/scibx.2008.713"          "10.1007/978-3-642-41714-6_91906"

DOIs to PMIDs

UPDATE: as of 2014-12-23 the web API behind these functions is down - we'll update the package once the API is up again

Publishers can optionally provide links in the metadata they provide to Crossref for full text of the work, but that data is often missing. Find out more about it at http://tdmsupport.crossref.org/.

Get some DOIs for articles that provide full text, and that have CC-BY 3.0 licenses (i.e., more likely to actually be open)

out <-
  cr_works(filter = list(has_full_text = TRUE,
    license_url = "http://creativecommons.org/licenses/by/3.0/"))
(dois <- out$data$DOI)
#>  [1] "10.1063/1.4905711" "10.1063/1.4908245" "10.1063/1.4908158"
#>  [4] "10.1063/1.4908244" "10.1063/1.4913247" "10.1063/1.4914847"
#>  [7] "10.1063/1.4916217" "10.1063/1.4916677" "10.1063/1.4916696"
#> [10] "10.1063/1.4917283" "10.1063/1.4918284" "10.1063/1.4919707"
#> [13] "10.1063/1.4921711" "10.1063/1.4921771" "10.1063/1.4922319"
#> [16] "10.1063/1.4922934" "10.1063/1.4922006" "10.1063/1.4926610"
#> [19] "10.1063/1.4926838" "10.1063/1.4926914"

Then get URLs to full text content

(links <- cr_ft_links(dois[1]))
#> NULL

Then use those URLs to get full text

cr_ft_text(links, "xml")
#> <?xml version="1.0"?>
#> <article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://jats.nlm.nih.gov/publishing/1.1d1/xsd/JATS-journalpublishing1-mathml3.xsd" dtd-version="1.1d1">
#>   <front>
#>     <journal-meta>
#>       <journal-id journal-id-type="publisher-id">SV</journal-id>
#>       <journal-title-group>
#>         <journal-title>Shock and Vibration</journal-title>
#>       </journal-title-group>
#>       <issn pub-type="epub">1875-9203</issn>
#>       <issn pub-type="ppub">1070-9622</issn>
#>       <publisher>
#>         <publisher-name>Hindawi Publishing Corporation</publisher-name>
#>       </publisher>
#>     </journal-meta>
#> .................... cutoff
  • Please report any issues or bugs.
  • License: MIT
  • Get citation information for rcrossref in R doing citation(package = 'rcrossref')
  • Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.

This package is part of a richer suite called fulltext, along with several other packages, that provides the ability to search for and retrieve full text of open access scholarly articles.


News

rcrossref 0.6.0

  • Added to man files where appropriate new 10K max value for the offset parameter (#126)
  • Added to pkg level man file new rate limit headers included, and how users can get to those, via config=verbose() call (#124)
  • Better failure modes on input parameters, still more work to do surely (#101)
  • sleeping now between tests to avoid making crossref rate
    limit gate keepers mad (#125)
  • cr_search and cr_search_free are now defunct. They were marked deprecated in previous version, and warned of defunctifying, and now they are defunct. Similar functionality can be done with e.g., cr_works() (#102)
  • crosscite is now defunct. The functionality of this function can be achieved with cr_cn() (#82)
  • cr_fundref is now defunct. Crossref changed their name fundref to funders, so we've changed our function, see cr_funders() (#83)
  • parameter sample maximum value is now 100, was previously 1000. documentation updated. (#118)
  • New filters has-clinical-trial-number and has-abstract added to the package, see ?filters for help (#120)

rcrossref 0.5.8

  • Addded an RStudio Addin for searching for citations. See ?rcrossref for more. Addin authored by Hao Zhu @haozhu233 (#114)
  • New function cr_abstract() that tries to get an abstract via XML provided by Crossref - NOTE: an abstract is rarely available though (#116)
  • Fixed bug in cr_cn() where DOIs with no minting agency found were failing because we were previously stopping when no agency found. Now, we just assume Crossref and move on from there. (#117) thanks @dfalster !
  • Fix to cr_r() when number requested > 100. Actual fix is in cr_works(). Max for sample used to be 1000, asked this on the Crossref API forum, see https://github.com/CrossRef/rest-api-doc/issues/146 (#115)
  • Fix to cr_journals() in internal parsing, was failing in cases where ISSN array was of length zero

rcrossref 0.5.4

  • Improved documentation for cr_citation_count() to remove PLOS reference as the function isn't only for PLOS works (#108)
  • Changed use of dplyr::rbind_all() to dplyr::bind_rows() (#113)

rcrossref 0.5.2

  • User-agent string is passed with every request to Crossref with names and versions of this package, and its HTTP dependency packages: httr and curl (which httr depends on). Will potentially be useful to Crossref to know how many requests come from this R client (#100)
  • cr_search() and cr_search_free() use old Crossref web services, so are now marked deprecated, and will throw a deprecation message, but can still be used. They will both be defunct in v0.6 of this package (#99)
  • XML replaced with xml2 (#98)
  • httr::content() calls: all parse to text then parse content manually. in addition, encoding explicitly set to UTF-8 on httr::content() calls (#98)
  • Bug fix to cr_journals() - fix to parse correctly on some failed requests (#97) thanks @nkorf
  • Bug fix to cr_fundref()/cr_funders() - parsing wasn't working correctly in all cases

rcrossref 0.5.0

Skipped v0.4 to v0.5 because of many changes - as described below.

  • Support added for 'deep paging' with the newer Crossref search API. Two new params added to each function: cursor, which accepts a cursor alphanumeric string or the special *, which indicates that you want to initiate deep paging; cursor_max, which is not in the Crossref API, but just used here in this package to indicate where to stop - otherwise, you'd get all results, even if there was 70 million, for example. A new internal R6 class used to make cursor requests easy (#77)
  • New function id_converter() to get a PMID from a DOI and vice versa (#49)
  • Added a Code of Conduct.
  • New function cr_types(), along with its low level equivalent cr_types_() for when you just want a list or json back (#92)
  • New suite of functions composing a low-level API for the Crossref search API. These functions only do data request, and return json or a list, and do not attempt to parse to a data.frame. The new functions: cr_funders_(), cr_journals_(), cr_licenses_(), cr_members_(), cr_prefixes_(), cr_types_(), cr_works_(). These functions are a bit faster, and aren't subject to parsing errors in the event of a change in the Crossref API. (#93)
  • Added new filter_names() and filter_details() functions to get information on what filters are available, the expected values, and what they mean.
  • Added documentation for new filter types, and added them to list of filters for filter_names() and filter_details() (#73)
  • cr_funders() alias added to cr_fundref() (#74)
  • Made note in docs that funders without IDs don't show up on the /funders route,s in cr_funders() (#79)
  • Made note in docs that sample parameter ignored unless works=TRUE (#81)
  • Added note to docs that only what is returned in the API is what is searched when you search the Crossref API - that is, abstracts and full text aren't searched (#91)
  • cr_cn() now checks that the user supplied content-type is supported for the DOI minting agency associated with the DOI (#88) (thanks @njahn82)
  • Removed .progress parameter use internally where it wasn't applicable.
  • sample parameter dropped from cr_licenses().
  • cr_works() parsing changed. We now don't attempt to flatten nested arrays, but instead give them back as data.frame's nested within the main data.frame. For example, author often has many entries, so we return that as a single column, but indexing to that column gives back a data.frame with a row for each author, and N number of columns. Hopefully this doesn't break too much code downstream :)
  • Additional text added to the package level man file (?rcrossref) to explain: what you're actually searching when you search; deprecated and defunct functions; and explanation of high vs. low level API.
  • Fix to cr_members() to warn on error instead of stop during parsing (#68)
  • Fix to internal parser for cr_works() to output links data, for full text links (#70)
  • Minor fix in cr_cn() example that didn't work (#80)
  • Fixed parsing of affiliation data inside author object in Crossref search API returned data (#84)
  • Fixed parsing of funder award slot in Crossref search API returned data (#90)
  • crosscite() deprecated, will be removed in a future version of this package (#78)
  • cr_fundref() now has a deprecated message, and will be removed in the next version (#74)

rcrossref 0.3.4

  • Fixed problems related to httr v1 (#65)
  • Import all non-base R functions (#64)
  • The agency route was down used by the cr_agency() function, back up and fixed now (#63)

rcrossref 0.3.0

  • New function extract_pdf() to extract text from pdfs
  • New function cr_ft_links() to get links for full text content of an article (#10)
  • New function cr_ft_text() to get links for full text content of an article. In addition, cr_ft_pdf(), cr_ft_plain(), and cr_ft_xml() are convenience functions that will get the format pdf, plain text, or xml, respectively. You can of course specify format in the cr_ft_text() function with the type parameter (#10) (#42)
  • Filled out more tests (#45)
  • No longer assign queried doi to the data.frame in cr_works(), which caused failure if a non-Crossref DOI included (#52)
  • pmid2doi() and doi2pmid() functions removed temporarily as the web service is down temporarily, but will be online again soon from Crossref (#48)

rcrossref 0.2.1

  • Fixes for man file examples. (#35)
  • cr_citation() is deprecated (stil useable, but will be removed in a future version of the package). use cr_cn() instead. (#34)

rcrossref 0.2.0

  • released to CRAN

Reference manual

It appears you don't have a PDF plugin for this browser. You can click here to download the reference manual.

install.packages("rcrossref")

0.7.0 by Scott Chamberlain, 5 months ago


https://github.com/ropensci/rcrossref


Report a bug at https://github.com/ropensci/rcrossref/issues


Browse source code at https://github.com/cran/rcrossref


Authors: Scott Chamberlain [aut, cre], Carl Boettiger [aut], Ted Hart [aut], Karthik Ram [aut]


Documentation:   PDF Manual  


MIT + file LICENSE license


Imports methods, utils, jsonlite, crul, xml2, plyr, bibtex, dplyr, R6, shiny, miniUI, stringr

Suggests roxygen2, testthat, knitr


Imported by fulltext.

Suggested by crminer, rcoreoa, roadoi.


See at CRAN