Last updated on Sun Feb 11 22:56:11 2024.

How to re-use this work

If you use these figures in your own work, please cite this website: https://doi.org/10.5281/zenodo.3470119

# Load required packages
library(magrittr)
library(dplyr)
library(jsonlite)
library(forcats)
library(ggplot2)
library(plotly)
library(here)
library(lubridate)

# We need two queries: one for uppercase titles, the other for lowercase ones
pdb_query <- 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=title:*SARS-CoV-2*%20AND%20status:REL&fl=pdb_id,deposition_date,citation_year,title,experimental_method,resolution,organism_scientific_name,molecule_name,molecule_type&rows=1000000&wt=json'

# This is a helper pipeline to extract data
dig_up_data <- . %>%
    .$response %>%
    .$docs %>%
    as_tibble()

# Query the PDB and clean up data
pdb_data <- pdb_query %>% 
    fromJSON() %>% 
    dig_up_data() %>% 
    bind_rows() %>% 
    mutate(
      experimental_method   = as_factor(as.character(experimental_method)),
      deposition_date       = as_date(ymd_hms(deposition_date)),
      citation_year         = as.integer(citation_year),
      molecule_name         = as.character(molecule_name),
      molecule_type         = as_factor(as.character(molecule_type))
      ) %>% 
    distinct(pdb_id, .keep_all = TRUE)

About this section

With the ongoing COVID-19 pandemic, new structures of proteins from SARS-CoV-2 are deposited to the PDB at a fast pace. The visualizations presented here won’t help curb the pandemic (and are not medical advice, obviously), but I hope they will make curious and helpless minds (like myself) get a feel for the research effort currently being deployed against it.

The following resources are a lot more relevant and actionable:

SARS-CoV-2 structures by deposition date

All figures are interactive (you can zoom in, and hovering over elements will show more information).

sars_cov2_structures_year <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = deposition_date)) +
    ggtitle("Structures of SARS-CoV-2 by deposition date") +
    xlab("Deposition date") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(sars_cov2_structures_year)

Noticeable very large numbers of depositions on a same day (for example, 68 structures deposited on March 15th and 14 structures deposited on April 7th) come from large-scale fragment screening experiments performed at Diamond Light Source:

Download figure in SVG format

SARS-CoV-2 structures by experimental method

sars_cov2_structures_method <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = experimental_method, fill = experimental_method)) +
    guides(fill = guide_legend(title = "Experimental method")) +
    ggtitle("Structures of SARS-CoV-2 by experimental method") +
    xlab("") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(sars_cov2_structures_method)

Download figure in SVG format

Dataset

The graphs presented above are derived from the following dataset:

# Format table for display
pdb_table <- pdb_data %>% 
    arrange(desc(citation_year)) %>% 
    select(`PDB code` = pdb_id,
           `Citation year` = citation_year,
           `Experimental method` = experimental_method,
           Title = title)
pdb_table

Download raw dataset in JSON format

LS0tCnRpdGxlOiAiU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIgotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQobWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0UpCmBgYAoKKipMYXN0IHVwZGF0ZWQgb24gYHIgZGF0ZSgpYC4qKgoKIyMgSG93IHRvIHJlLXVzZSB0aGlzIHdvcmsKCklmIHlvdSB1c2UgdGhlc2UgZmlndXJlcyBpbiB5b3VyIG93biB3b3JrLCBwbGVhc2UgY2l0ZSB0aGlzIHdlYnNpdGU6CjxodHRwczovL2RvaS5vcmcvMTAuNTI4MS96ZW5vZG8uMzQ3MDExOT4KCmBgYHtyIExvYWQgcGFja2FnZXMsIGRvd25sb2FkIGRhdGEgYW5kIHByZXBhcmUgZGF0YXNldH0KIyBMb2FkIHJlcXVpcmVkIHBhY2thZ2VzCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoanNvbmxpdGUpCmxpYnJhcnkoZm9yY2F0cykKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeShoZXJlKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKCiMgV2UgbmVlZCB0d28gcXVlcmllczogb25lIGZvciB1cHBlcmNhc2UgdGl0bGVzLCB0aGUgb3RoZXIgZm9yIGxvd2VyY2FzZSBvbmVzCnBkYl9xdWVyeSA8LSAnaHR0cHM6Ly93d3cuZWJpLmFjLnVrL3BkYmUvc2VhcmNoL3BkYi9zZWxlY3Q/cT10aXRsZToqU0FSUy1Db1YtMiolMjBBTkQlMjBzdGF0dXM6UkVMJmZsPXBkYl9pZCxkZXBvc2l0aW9uX2RhdGUsY2l0YXRpb25feWVhcix0aXRsZSxleHBlcmltZW50YWxfbWV0aG9kLHJlc29sdXRpb24sb3JnYW5pc21fc2NpZW50aWZpY19uYW1lLG1vbGVjdWxlX25hbWUsbW9sZWN1bGVfdHlwZSZyb3dzPTEwMDAwMDAmd3Q9anNvbicKCiMgVGhpcyBpcyBhIGhlbHBlciBwaXBlbGluZSB0byBleHRyYWN0IGRhdGEKZGlnX3VwX2RhdGEgPC0gLiAlPiUKICAgIC4kcmVzcG9uc2UgJT4lCiAgICAuJGRvY3MgJT4lCiAgICBhc190aWJibGUoKQoKIyBRdWVyeSB0aGUgUERCIGFuZCBjbGVhbiB1cCBkYXRhCnBkYl9kYXRhIDwtIHBkYl9xdWVyeSAlPiUgCiAgICBmcm9tSlNPTigpICU+JSAKICAgIGRpZ191cF9kYXRhKCkgJT4lIAogICAgYmluZF9yb3dzKCkgJT4lIAogICAgbXV0YXRlKAogICAgICBleHBlcmltZW50YWxfbWV0aG9kICAgPSBhc19mYWN0b3IoYXMuY2hhcmFjdGVyKGV4cGVyaW1lbnRhbF9tZXRob2QpKSwKICAgICAgZGVwb3NpdGlvbl9kYXRlICAgICAgID0gYXNfZGF0ZSh5bWRfaG1zKGRlcG9zaXRpb25fZGF0ZSkpLAogICAgICBjaXRhdGlvbl95ZWFyICAgICAgICAgPSBhcy5pbnRlZ2VyKGNpdGF0aW9uX3llYXIpLAogICAgICBtb2xlY3VsZV9uYW1lICAgICAgICAgPSBhcy5jaGFyYWN0ZXIobW9sZWN1bGVfbmFtZSksCiAgICAgIG1vbGVjdWxlX3R5cGUgICAgICAgICA9IGFzX2ZhY3Rvcihhcy5jaGFyYWN0ZXIobW9sZWN1bGVfdHlwZSkpCiAgICAgICkgJT4lIAogICAgZGlzdGluY3QocGRiX2lkLCAua2VlcF9hbGwgPSBUUlVFKQpgYGAKCiMjIEFib3V0IHRoaXMgc2VjdGlvbgoKV2l0aCB0aGUgb25nb2luZyBbQ09WSUQtMTkKcGFuZGVtaWNdKGh0dHBzOi8vZW4ud2lraXBlZGlhLm9yZy93aWtpL0NPVklELTE5X3BhbmRlbWljKSwgbmV3IHN0cnVjdHVyZXMgb2YKcHJvdGVpbnMgZnJvbQpbU0FSUy1Db1YtMl0oaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvU2V2ZXJlX2FjdXRlX3Jlc3BpcmF0b3J5X3N5bmRyb21lX2Nvcm9uYXZpcnVzXzIpIGFyZSBkZXBvc2l0ZWQgdG8gdGhlIFBEQiBhdCBhIGZhc3QgcGFjZS4gVGhlIHZpc3VhbGl6YXRpb25zIHByZXNlbnRlZCBoZXJlCndvbid0IGhlbHAgY3VyYiB0aGUgcGFuZGVtaWMgKGFuZCBhcmUgbm90IG1lZGljYWwgYWR2aWNlLCBvYnZpb3VzbHkpLCBidXQgSSBob3BlCnRoZXkgd2lsbCBtYWtlIGN1cmlvdXMgYW5kIGhlbHBsZXNzIG1pbmRzIChsaWtlIG15c2VsZikgZ2V0IGEgZmVlbCBmb3IgdGhlCnJlc2VhcmNoIGVmZm9ydCBjdXJyZW50bHkgYmVpbmcgZGVwbG95ZWQgYWdhaW5zdCBpdC4KClRoZSBmb2xsb3dpbmcgcmVzb3VyY2VzIGFyZSBhIGxvdCBtb3JlIHJlbGV2YW50IGFuZCBhY3Rpb25hYmxlOgoKLSBbQ29yb25hdmlydXMgU3RydWN0dXJhbCBUYXNrIEZvcmNlXShodHRwczovL2luc2lkZWNvcm9uYS5uZXQvKQotIFtQREJlIENPVklELTE5IERhdGEgUG9ydGFsXShodHRwczovL3d3dy5lYmkuYWMudWsvcGRiZS9jb3ZpZC0xOSkKLSBbQ3Jvd2RmaWdodCBDT1ZJRC0xOV0oaHR0cHM6Ly9jcm93ZGZpZ2h0Y292aWQxOS5vcmcvKQoKIyMgU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIGJ5IGRlcG9zaXRpb24gZGF0ZQoKQWxsIGZpZ3VyZXMgYXJlIGludGVyYWN0aXZlICh5b3UgY2FuIHpvb20gaW4sIGFuZCBob3ZlcmluZyBvdmVyIGVsZW1lbnRzIHdpbGwKc2hvdyBtb3JlIGluZm9ybWF0aW9uKS4KCmBgYHtyIFNBUlMtQ29WLTIgc3RydWN0dXJlcyBieSB5ZWFyfQpzYXJzX2NvdjJfc3RydWN0dXJlc195ZWFyIDwtIHBkYl9kYXRhICU+JSAKICAgIGdncGxvdCgpICsKICAgIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IGRlcG9zaXRpb25fZGF0ZSkpICsKICAgIGdndGl0bGUoIlN0cnVjdHVyZXMgb2YgU0FSUy1Db1YtMiBieSBkZXBvc2l0aW9uIGRhdGUiKSArCiAgICB4bGFiKCJEZXBvc2l0aW9uIGRhdGUiKSArCiAgICB5bGFiKCJOdW1iZXIgb2YgUERCIGVudHJpZXMiKSArCiAgICB0aGVtZV9idygpCmdncGxvdGx5KHNhcnNfY292Ml9zdHJ1Y3R1cmVzX3llYXIpCmBgYAoKTm90aWNlYWJsZSB2ZXJ5IGxhcmdlIG51bWJlcnMgb2YgZGVwb3NpdGlvbnMgb24gYSBzYW1lIGRheSAoZm9yIGV4YW1wbGUsIDY4CnN0cnVjdHVyZXMgZGVwb3NpdGVkIG9uIE1hcmNoIDE1dGggYW5kIDE0IHN0cnVjdHVyZXMgZGVwb3NpdGVkIG9uIEFwcmlsIDd0aCkKY29tZSBmcm9tIGxhcmdlLXNjYWxlIGZyYWdtZW50IHNjcmVlbmluZyBleHBlcmltZW50cyBwZXJmb3JtZWQgYXQgRGlhbW9uZCBMaWdodApTb3VyY2U6CgotIFttYWluIHByb3RlYXNlXShodHRwczovL3d3dy5kaWFtb25kLmFjLnVrL2NvdmlkLTE5L2Zvci1zY2llbnRpc3RzL01haW4tcHJvdGVhc2Utc3RydWN0dXJlLWFuZC1YQ2hlbS5odG1sKQotIFttYWNybyBkb21haW5dKGh0dHBzOi8vd3d3LmRpYW1vbmQuYWMudWsvY292aWQtMTkvZm9yLXNjaWVudGlzdHMvTlNQMy1tYWNyb2RvbWFpbi1zdHJ1Y3R1cmUtYW5kLVhDaGVtLmh0bWwpCi0gW2hlbGljYXNlXShodHRwczovL3d3dy5kaWFtb25kLmFjLnVrL2NvdmlkLTE5L2Zvci1zY2llbnRpc3RzL05TUDEzLUhlbGljYXNlLVN0cnVjdHVyZS1hbmQtWENoZW0uaHRtbCkKCmBgYHtyIFNhdmUgU1ZHIGZpbGUgb2YgeWVhciBncmFwaCwgaW5jbHVkZT1GQUxTRX0KIyBTYXZlIGZpZ3VyZSBmb3IgZG93bmxvYWQKaWYgKCFkaXIuZXhpc3RzKGhlcmUoImZpZ3VyZXMiKSkpIHsKICAgIGRpci5jcmVhdGUoaGVyZSgiZmlndXJlcyIpKQp9Cmdnc2F2ZShmaWxlbmFtZSA9ICJzYXJzLWNvdjItc3RydWN0dXJlcy1ieS15ZWFyLnN2ZyIsCiAgICAgICBwbG90ID0gc2Fyc19jb3YyX3N0cnVjdHVyZXNfeWVhciwKICAgICAgIGRldmljZSA9ICJzdmciLAogICAgICAgcGF0aCA9IGhlcmUoImZpZ3VyZXMiKSkKYGBgCgpbKipEb3dubG9hZCBmaWd1cmUgaW4gU1ZHIGZvcm1hdCoqXShmaWd1cmVzL3NhcnMtY292Mi1zdHJ1Y3R1cmVzLWJ5LXllYXIuc3ZnKQoKIyMgU0FSUy1Db1YtMiBzdHJ1Y3R1cmVzIGJ5IGV4cGVyaW1lbnRhbCBtZXRob2QKCmBgYHtyIFNBUlMtQ29WLTIgc3RydWN0dXJlcyBieSBleHBlcmltZW50YWwgbWV0aG9kfQpzYXJzX2NvdjJfc3RydWN0dXJlc19tZXRob2QgPC0gcGRiX2RhdGEgJT4lIAogICAgZ2dwbG90KCkgKwogICAgZ2VvbV9iYXIobWFwcGluZyA9IGFlcyh4ID0gZXhwZXJpbWVudGFsX21ldGhvZCwgZmlsbCA9IGV4cGVyaW1lbnRhbF9tZXRob2QpKSArCiAgICBndWlkZXMoZmlsbCA9IGd1aWRlX2xlZ2VuZCh0aXRsZSA9ICJFeHBlcmltZW50YWwgbWV0aG9kIikpICsKICAgIGdndGl0bGUoIlN0cnVjdHVyZXMgb2YgU0FSUy1Db1YtMiBieSBleHBlcmltZW50YWwgbWV0aG9kIikgKwogICAgeGxhYigiIikgKwogICAgeWxhYigiTnVtYmVyIG9mIFBEQiBlbnRyaWVzIikgKwogICAgdGhlbWVfYncoKQpnZ3Bsb3RseShzYXJzX2NvdjJfc3RydWN0dXJlc19tZXRob2QpCmBgYAoKYGBge3IgU2F2ZSBTVkcgZmlsZSBvZiBtZXRob2QgZ3JhcGgsIGluY2x1ZGU9RkFMU0V9CiMgU2F2ZSBmaWd1cmUgZm9yIGRvd25sb2FkCmlmICghZGlyLmV4aXN0cyhoZXJlKCJmaWd1cmVzIikpKSB7CiAgICBkaXIuY3JlYXRlKGhlcmUoImZpZ3VyZXMiKSkKfQpnZ3NhdmUoZmlsZW5hbWUgPSAic2Fycy1jb3YyLXN0cnVjdHVyZXMtYnktbWV0aG9kLnN2ZyIsCiAgICAgICBwbG90ID0gc2Fyc19jb3YyX3N0cnVjdHVyZXNfbWV0aG9kLAogICAgICAgZGV2aWNlID0gInN2ZyIsCiAgICAgICBwYXRoID0gaGVyZSgiZmlndXJlcyIpKQpgYGAKClsqKkRvd25sb2FkIGZpZ3VyZSBpbiBTVkcgZm9ybWF0KipdKGZpZ3VyZXMvc2Fycy1jb3YyLXN0cnVjdHVyZXMtYnktbWV0aG9kLnN2ZykKCgojIyBEYXRhc2V0CgpUaGUgZ3JhcGhzIHByZXNlbnRlZCBhYm92ZSBhcmUgZGVyaXZlZCBmcm9tIHRoZSBmb2xsb3dpbmcgZGF0YXNldDoKCmBgYHtyIEVudGlyZSBkYXRhc2V0fQojIEZvcm1hdCB0YWJsZSBmb3IgZGlzcGxheQpwZGJfdGFibGUgPC0gcGRiX2RhdGEgJT4lIAogICAgYXJyYW5nZShkZXNjKGNpdGF0aW9uX3llYXIpKSAlPiUgCiAgICBzZWxlY3QoYFBEQiBjb2RlYCA9IHBkYl9pZCwKICAgICAgICAgICBgQ2l0YXRpb24geWVhcmAgPSBjaXRhdGlvbl95ZWFyLAogICAgICAgICAgIGBFeHBlcmltZW50YWwgbWV0aG9kYCA9IGV4cGVyaW1lbnRhbF9tZXRob2QsCiAgICAgICAgICAgVGl0bGUgPSB0aXRsZSkKcGRiX3RhYmxlCmBgYAoKYGBge3IgU2F2ZSBkYXRhc2V0IGFzIEpTT04sIGluY2x1ZGU9RkFMU0V9CiMgU2F2ZSBkYXRhc2V0IGZvciBkb3dubG9hZAppZiAoIWRpci5leGlzdHMoaGVyZSgiZGF0YXNldHMiKSkpIHsKICAgIGRpci5jcmVhdGUoaGVyZSgiZGF0YXNldHMiKSkKfQoKd3JpdGVfanNvbihwZGJfZGF0YSwgaGVyZSgiZGF0YXNldHMiLCAic2Fycy1jb3YyLXN0cnVjdHVyZXMuanNvbiIpKQpgYGAKClsqKkRvd25sb2FkIHJhdyBkYXRhc2V0IGluIEpTT04gZm9ybWF0KipdKGRhdGFzZXRzL3NhcnMtY292Mi1zdHJ1Y3R1cmVzLmpzb24pCg==