Last updated on Tue Dec 1 14:24:52 2020.

How to re-use this work

If you use these figures in your own work, please cite this website: https://doi.org/10.5281/zenodo.3470119

# Load required packages
library(magrittr)
library(dplyr)
library(jsonlite)
library(forcats)
library(ggplot2)
library(plotly)
library(here)
library(lubridate)

# We need two queries: one for uppercase titles, the other for lowercase ones
pdb_query <- 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=title:*SARS-CoV-2*%20AND%20status:REL&fl=pdb_id,deposition_date,citation_year,title,experimental_method,resolution,organism_scientific_name,molecule_name,molecule_type&rows=1000000&wt=json'

# This is a helper pipeline to extract data
dig_up_data <- . %>%
    .$response %>%
    .$docs %>%
    as_tibble()

# Query the PDB and clean up data
pdb_data <- pdb_query %>% 
    fromJSON() %>% 
    dig_up_data() %>% 
    bind_rows() %>% 
    mutate(
      experimental_method   = as_factor(as.character(experimental_method)),
      deposition_date       = as_date(ymd_hms(deposition_date)),
      citation_year         = as.integer(citation_year),
      molecule_name         = as.character(molecule_name),
      molecule_type         = as_factor(as.character(molecule_type))
      ) %>% 
    distinct(pdb_id, .keep_all = TRUE)

About this section

With the ongoing COVID-19 pandemic, new structures of proteins from SARS-CoV-2 are deposited to the PDB at a fast pace. The visualizations presented here wonโ€™t help curb the pandemic (and are not medical advice, obviously), but I hope they will make curious and helpless minds (like myself) get a feel for the research effort currently being deployed against it.

The following resources are a lot more relevant and actionable:

SARS-CoV-2 structures by deposition date

All figures are interactive (you can zoom in, and hovering over elements will show more information).

sars_cov2_structures_year <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = deposition_date)) +
    ggtitle("Structures of SARS-CoV-2 by deposition date") +
    xlab("Deposition date") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(sars_cov2_structures_year)

The two noticeable spikes (68 structures deposited on March 15th and 13 structures deposited on April 7th) come from a fragment screening experiment performed at Diamond Light Source. All these structures are the same protein (main protease) in complex with candidate molecules for drug design.

Download figure in SVG format

SARS-CoV-2 structures by experimental method

sars_cov2_structures_method <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = experimental_method, fill = experimental_method)) +
    guides(fill = guide_legend(title = "Experimental method")) +
    ggtitle("Structures of SARS-CoV-2 by experimental method") +
    xlab("") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(sars_cov2_structures_method)

Download figure in SVG format

Dataset

The graphs presented above are derived from the following dataset:

# Format table for display
pdb_table <- pdb_data %>% 
    arrange(desc(citation_year)) %>% 
    select(`PDB code` = pdb_id,
           `Citation year` = citation_year,
           `Experimental method` = experimental_method,
           Title = title)
pdb_table

Download raw dataset in JSON format

LS0tCnRpdGxlOiAiU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIgotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQobWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0UpCmBgYAoKKipMYXN0IHVwZGF0ZWQgb24gYHIgZGF0ZSgpYC4qKgoKIyMgSG93IHRvIHJlLXVzZSB0aGlzIHdvcmsKCklmIHlvdSB1c2UgdGhlc2UgZmlndXJlcyBpbiB5b3VyIG93biB3b3JrLCBwbGVhc2UgY2l0ZSB0aGlzIHdlYnNpdGU6CjxodHRwczovL2RvaS5vcmcvMTAuNTI4MS96ZW5vZG8uMzQ3MDExOT4KCmBgYHtyIExvYWQgcGFja2FnZXMsIGRvd25sb2FkIGRhdGEgYW5kIHByZXBhcmUgZGF0YXNldH0KIyBMb2FkIHJlcXVpcmVkIHBhY2thZ2VzCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoanNvbmxpdGUpCmxpYnJhcnkoZm9yY2F0cykKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeShoZXJlKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKCiMgV2UgbmVlZCB0d28gcXVlcmllczogb25lIGZvciB1cHBlcmNhc2UgdGl0bGVzLCB0aGUgb3RoZXIgZm9yIGxvd2VyY2FzZSBvbmVzCnBkYl9xdWVyeSA8LSAnaHR0cHM6Ly93d3cuZWJpLmFjLnVrL3BkYmUvc2VhcmNoL3BkYi9zZWxlY3Q/cT10aXRsZToqU0FSUy1Db1YtMiolMjBBTkQlMjBzdGF0dXM6UkVMJmZsPXBkYl9pZCxkZXBvc2l0aW9uX2RhdGUsY2l0YXRpb25feWVhcix0aXRsZSxleHBlcmltZW50YWxfbWV0aG9kLHJlc29sdXRpb24sb3JnYW5pc21fc2NpZW50aWZpY19uYW1lLG1vbGVjdWxlX25hbWUsbW9sZWN1bGVfdHlwZSZyb3dzPTEwMDAwMDAmd3Q9anNvbicKCiMgVGhpcyBpcyBhIGhlbHBlciBwaXBlbGluZSB0byBleHRyYWN0IGRhdGEKZGlnX3VwX2RhdGEgPC0gLiAlPiUKICAgIC4kcmVzcG9uc2UgJT4lCiAgICAuJGRvY3MgJT4lCiAgICBhc190aWJibGUoKQoKIyBRdWVyeSB0aGUgUERCIGFuZCBjbGVhbiB1cCBkYXRhCnBkYl9kYXRhIDwtIHBkYl9xdWVyeSAlPiUgCiAgICBmcm9tSlNPTigpICU+JSAKICAgIGRpZ191cF9kYXRhKCkgJT4lIAogICAgYmluZF9yb3dzKCkgJT4lIAogICAgbXV0YXRlKAogICAgICBleHBlcmltZW50YWxfbWV0aG9kICAgPSBhc19mYWN0b3IoYXMuY2hhcmFjdGVyKGV4cGVyaW1lbnRhbF9tZXRob2QpKSwKICAgICAgZGVwb3NpdGlvbl9kYXRlICAgICAgID0gYXNfZGF0ZSh5bWRfaG1zKGRlcG9zaXRpb25fZGF0ZSkpLAogICAgICBjaXRhdGlvbl95ZWFyICAgICAgICAgPSBhcy5pbnRlZ2VyKGNpdGF0aW9uX3llYXIpLAogICAgICBtb2xlY3VsZV9uYW1lICAgICAgICAgPSBhcy5jaGFyYWN0ZXIobW9sZWN1bGVfbmFtZSksCiAgICAgIG1vbGVjdWxlX3R5cGUgICAgICAgICA9IGFzX2ZhY3Rvcihhcy5jaGFyYWN0ZXIobW9sZWN1bGVfdHlwZSkpCiAgICAgICkgJT4lIAogICAgZGlzdGluY3QocGRiX2lkLCAua2VlcF9hbGwgPSBUUlVFKQpgYGAKCiMjIEFib3V0IHRoaXMgc2VjdGlvbgoKV2l0aCB0aGUgb25nb2luZyBbQ09WSUQtMTkKcGFuZGVtaWNdKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0NPVklELTE5X3BhbmRlbWljKSwgbmV3IHN0cnVjdHVyZXMgb2YKcHJvdGVpbnMgZnJvbQpbU0FSUy1Db1YtMl0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvU2V2ZXJlX2FjdXRlX3Jlc3BpcmF0b3J5X3N5bmRyb21lX2Nvcm9uYXZpcnVzXzIpIGFyZSBkZXBvc2l0ZWQgdG8gdGhlIFBEQiBhdCBhIGZhc3QgcGFjZS4gVGhlIHZpc3VhbGl6YXRpb25zIHByZXNlbnRlZCBoZXJlCndvbid0IGhlbHAgY3VyYiB0aGUgcGFuZGVtaWMgKGFuZCBhcmUgbm90IG1lZGljYWwgYWR2aWNlLCBvYnZpb3VzbHkpLCBidXQgSSBob3BlCnRoZXkgd2lsbCBtYWtlIGN1cmlvdXMgYW5kIGhlbHBsZXNzIG1pbmRzIChsaWtlIG15c2VsZikgZ2V0IGEgZmVlbCBmb3IgdGhlCnJlc2VhcmNoIGVmZm9ydCBjdXJyZW50bHkgYmVpbmcgZGVwbG95ZWQgYWdhaW5zdCBpdC4KClRoZSBmb2xsb3dpbmcgcmVzb3VyY2VzIGFyZSBhIGxvdCBtb3JlIHJlbGV2YW50IGFuZCBhY3Rpb25hYmxlOgoKLSBbQ29yb25hdmlydXMgU3RydWN0dXJhbCBUYXNrIEZvcmNlXShodHRwczovL2luc2lkZWNvcm9uYS5uZXQvKQotIFtQREJlIENPVklELTE5IERhdGEgUG9ydGFsXShodHRwczovL3d3dy5lYmkuYWMudWsvcGRiZS9jb3ZpZC0xOSkKLSBbQ3Jvd2RmaWdodCBDT1ZJRC0xOV0oaHR0cHM6Ly9jcm93ZGZpZ2h0Y292aWQxOS5vcmcvKQoKIyMgU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIGJ5IGRlcG9zaXRpb24gZGF0ZQoKQWxsIGZpZ3VyZXMgYXJlIGludGVyYWN0aXZlICh5b3UgY2FuIHpvb20gaW4sIGFuZCBob3ZlcmluZyBvdmVyIGVsZW1lbnRzIHdpbGwKc2hvdyBtb3JlIGluZm9ybWF0aW9uKS4KCmBgYHtyIFNBUlMtQ29WLTIgc3RydWN0dXJlcyBieSB5ZWFyfQpzYXJzX2NvdjJfc3RydWN0dXJlc195ZWFyIDwtIHBkYl9kYXRhICU+JSAKICAgIGdncGxvdCgpICsKICAgIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IGRlcG9zaXRpb25fZGF0ZSkpICsKICAgIGdndGl0bGUoIlN0cnVjdHVyZXMgb2YgU0FSUy1Db1YtMiBieSBkZXBvc2l0aW9uIGRhdGUiKSArCiAgICB4bGFiKCJEZXBvc2l0aW9uIGRhdGUiKSArCiAgICB5bGFiKCJOdW1iZXIgb2YgUERCIGVudHJpZXMiKSArCiAgICB0aGVtZV9idygpCmdncGxvdGx5KHNhcnNfY292Ml9zdHJ1Y3R1cmVzX3llYXIpCmBgYAoKVGhlIHR3byBub3RpY2VhYmxlIHNwaWtlcyAoNjggc3RydWN0dXJlcyBkZXBvc2l0ZWQgb24gTWFyY2ggMTV0aCBhbmQgMTMKc3RydWN0dXJlcyBkZXBvc2l0ZWQgb24gQXByaWwgN3RoKSBjb21lIGZyb20gW2EgZnJhZ21lbnQgc2NyZWVuaW5nCmV4cGVyaW1lbnRdKGh0dHBzOi8vd3d3LmRpYW1vbmQuYWMudWsvY292aWQtMTkvZm9yLXNjaWVudGlzdHMvTWFpbi1wcm90ZWFzZS1zdHJ1Y3R1cmUtYW5kLVhDaGVtLmh0bWwpCnBlcmZvcm1lZCBhdCBEaWFtb25kIExpZ2h0IFNvdXJjZS4gQWxsIHRoZXNlIHN0cnVjdHVyZXMgYXJlIHRoZSBzYW1lIHByb3RlaW4KKG1haW4gcHJvdGVhc2UpIGluIGNvbXBsZXggd2l0aCBjYW5kaWRhdGUgbW9sZWN1bGVzIGZvciBkcnVnIGRlc2lnbi4KCmBgYHtyIFNhdmUgU1ZHIGZpbGUgb2YgeWVhciBncmFwaCwgaW5jbHVkZT1GQUxTRX0KIyBTYXZlIGZpZ3VyZSBmb3IgZG93bmxvYWQKaWYgKCFkaXIuZXhpc3RzKGhlcmUoImZpZ3VyZXMiKSkpIHsKICAgIGRpci5jcmVhdGUoaGVyZSgiZmlndXJlcyIpKQp9Cmdnc2F2ZShmaWxlbmFtZSA9ICJzYXJzLWNvdjItc3RydWN0dXJlcy1ieS15ZWFyLnN2ZyIsCiAgICAgICBwbG90ID0gc2Fyc19jb3YyX3N0cnVjdHVyZXNfeWVhciwKICAgICAgIGRldmljZSA9ICJzdmciLAogICAgICAgcGF0aCA9IGhlcmUoImZpZ3VyZXMiKSkKYGBgCgpbKipEb3dubG9hZCBmaWd1cmUgaW4gU1ZHIGZvcm1hdCoqXShmaWd1cmVzL3NhcnMtY292Mi1zdHJ1Y3R1cmVzLWJ5LXllYXIuc3ZnKQoKIyMgU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIGJ5IGV4cGVyaW1lbnRhbCBtZXRob2QKCmBgYHtyIFNBUlMtQ29WLTIgc3RydWN0dXJlcyBieSBleHBlcmltZW50YWwgbWV0aG9kfQpzYXJzX2NvdjJfc3RydWN0dXJlc19tZXRob2QgPC0gcGRiX2RhdGEgJT4lIAogICAgZ2dwbG90KCkgKwogICAgZ2VvbV9iYXIobWFwcGluZyA9IGFlcyh4ID0gZXhwZXJpbWVudGFsX21ldGhvZCwgZmlsbCA9IGV4cGVyaW1lbnRhbF9tZXRob2QpKSArCiAgICBndWlkZXMoZmlsbCA9IGd1aWRlX2xlZ2VuZCh0aXRsZSA9ICJFeHBlcmltZW50YWwgbWV0aG9kIikpICsKICAgIGdndGl0bGUoIlN0cnVjdHVyZXMgb2YgU0FSUy1Db1YtMiBieSBleHBlcmltZW50YWwgbWV0aG9kIikgKwogICAgeGxhYigiIikgKwogICAgeWxhYigiTnVtYmVyIG9mIFBEQiBlbnRyaWVzIikgKwogICAgdGhlbWVfYncoKQpnZ3Bsb3RseShzYXJzX2NvdjJfc3RydWN0dXJlc19tZXRob2QpCmBgYAoKYGBge3IgU2F2ZSBTVkcgZmlsZSBvZiBtZXRob2QgZ3JhcGgsIGluY2x1ZGU9RkFMU0V9CiMgU2F2ZSBmaWd1cmUgZm9yIGRvd25sb2FkCmlmICghZGlyLmV4aXN0cyhoZXJlKCJmaWd1cmVzIikpKSB7CiAgICBkaXIuY3JlYXRlKGhlcmUoImZpZ3VyZXMiKSkKfQpnZ3NhdmUoZmlsZW5hbWUgPSAic2Fycy1jb3YyLXN0cnVjdHVyZXMtYnktbWV0aG9kLnN2ZyIsCiAgICAgICBwbG90ID0gc2Fyc19jb3YyX3N0cnVjdHVyZXNfbWV0aG9kLAogICAgICAgZGV2aWNlID0gInN2ZyIsCiAgICAgICBwYXRoID0gaGVyZSgiZmlndXJlcyIpKQpgYGAKClsqKkRvd25sb2FkIGZpZ3VyZSBpbiBTVkcgZm9ybWF0KipdKGZpZ3VyZXMvc2Fycy1jb3YyLXN0cnVjdHVyZXMtYnktbWV0aG9kLnN2ZykKCgojIyBEYXRhc2V0CgpUaGUgZ3JhcGhzIHByZXNlbnRlZCBhYm92ZSBhcmUgZGVyaXZlZCBmcm9tIHRoZSBmb2xsb3dpbmcgZGF0YXNldDoKCmBgYHtyIEVudGlyZSBkYXRhc2V0fQojIEZvcm1hdCB0YWJsZSBmb3IgZGlzcGxheQpwZGJfdGFibGUgPC0gcGRiX2RhdGEgJT4lIAogICAgYXJyYW5nZShkZXNjKGNpdGF0aW9uX3llYXIpKSAlPiUgCiAgICBzZWxlY3QoYFBEQiBjb2RlYCA9IHBkYl9pZCwKICAgICAgICAgICBgQ2l0YXRpb24geWVhcmAgPSBjaXRhdGlvbl95ZWFyLAogICAgICAgICAgIGBFeHBlcmltZW50YWwgbWV0aG9kYCA9IGV4cGVyaW1lbnRhbF9tZXRob2QsCiAgICAgICAgICAgVGl0bGUgPSB0aXRsZSkKcGRiX3RhYmxlCmBgYAoKYGBge3IgU2F2ZSBkYXRhc2V0IGFzIEpTT04sIGluY2x1ZGU9RkFMU0V9CiMgU2F2ZSBkYXRhc2V0IGZvciBkb3dubG9hZAppZiAoIWRpci5leGlzdHMoaGVyZSgiZGF0YXNldHMiKSkpIHsKICAgIGRpci5jcmVhdGUoaGVyZSgiZGF0YXNldHMiKSkKfQoKd3JpdGVfanNvbihwZGJfZGF0YSwgaGVyZSgiZGF0YXNldHMiLCAic2Fycy1jb3YyLXN0cnVjdHVyZXMuanNvbiIpKQpgYGAKClsqKkRvd25sb2FkIHJhdyBkYXRhc2V0IGluIEpTT04gZm9ybWF0KipdKGRhdGFzZXRzL3NhcnMtY292Mi1zdHJ1Y3R1cmVzLmpzb24pCg==