• How to re-use this work
  • Nucleosome structures by year
    • Nucleosome structures by year and experimental method
    • Nucleosome structures by year and presence of a binding factor
  • Nucleosome structures by experimental method
  • Nucleosome structures by presence of a binding factor
  • Nucleosome structures by histone species
    • Crystal structures of nucleosomes by histone species
    • Cryo-EM structures of nucleosomes by histone species
  • Resolution of nucleosome structures by histone species
    • Crystal structures
    • Cryo-EM structures
  • Dataset

Last updated on Sun Feb 11 22:55:44 2024.

How to re-use this work

If you use the following figures in your own work, please cite:

# Load required packages
library(magrittr)
library(dplyr)
library(purrr)
library(jsonlite)
library(forcats)
library(ggplot2)
library(plotly)
library(here)
library(stringr)

# We need two queries: one for uppercase titles, the other for lowercase ones
pdb_queries <- c(
    uppercase = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=title:*UCLEOSOM*%20AND%20status:REL&fl=pdb_id,citation_year,title,experimental_method,resolution,organism_scientific_name,molecule_name,molecule_type,number_of_protein_chains&rows=1000000&wt=json',
    lowercase = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=title:*ucleosom*%20AND%20status:REL&fl=pdb_id,citation_year,title,experimental_method,resolution,organism_scientific_name,molecule_name,molecule_type,number_of_protein_chains&rows=1000000&wt=json',
    ncp = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?q=title:*NCP*%20AND%20status:REL&fl=pdb_id,citation_year,title,experimental_method,resolution,organism_scientific_name,molecule_name,molecule_type,number_of_protein_chains&rows=1000000&wt=json'
)

# The following PDB entries have the word "nucleosome in their title, but do not
# actually contain a nucleosome, so we need to exclude them from the analysis.
# Unfortunately, there is no good way to automate this.
non_nucleosome_structures <- c(
    "1hst",
    "2z2r",
    "5x7v",
    "3uv2",
    "3fs3",
    "1wg3",
    "1nw3",
    "5ikf",
    "3gyw",
    "3gyv",
    "1ofc",
    "2ayu",
    "2iw5",
    "3hfd",
    "6uch",
    "5r4k",
    "5r4m",
    "5r4l",
    "5r4o",
    "5r4g",
    "5r4h",
    "5r4j",
    "5r4i",
    "5r4n",
    "1bj6",
    "6qds",
    "2iwj",
    "4dvk",
    "1a6b",
    "6qdu",
    "6ne8",
    "1tsu",
    "1esk",
    "7c4j"
)

# This is a helper pipeline to extract data
dig_up_data <- . %>%
    .$response %>%
    .$docs %>%
    as_tibble()

# This is a helper function to detect the presence of a binding factor
has_binding_factor_one <- function(number_of_protein_chains, title) {
    # It takes 8 histone chains to make a nucleosome, so if the number of
    # protein chains is not divisible by 8, this means there is a binding factor,
    # unless we're seeing the overlapping dinucleosome (14 proteins chains, but
    # they are all histones... one octamer + one hexamer)
    oldn <- str_detect(title, "unusual")
    if (number_of_protein_chains %% 8 != 0 & !oldn) {
        return(TRUE)
    } else {
        # But for multiple of 8 chains > 8 (i.e. 16, 24, 32, 40), we can have this
        # number of chains by chance even with binding factors. There is no good
        # way to automatically find these cases, unfortunately
        compass <- str_detect(title, "COMPASS")
        corest <- str_detect(title, "LSD1/CoREST")
        binding_factor <- compass || corest
        if (binding_factor) {
            return(TRUE)
        } else {
            return(FALSE)
        }
    }
}

# Vectorize the above function
has_binding_factor <- function(number_of_protein_chains_vector, title) {
    map2_lgl(number_of_protein_chains_vector, title, has_binding_factor_one)
}

# Query the PDB and clean up data
pdb_data <- pdb_queries %>% 
    map(fromJSON) %>% 
    map(dig_up_data) %>% 
    bind_rows() %>% 
    filter(!(pdb_id %in% non_nucleosome_structures)) %>% 
    mutate(
      has_binding_factor    = has_binding_factor(number_of_protein_chains, title),
      experimental_method   = as_factor(as.character(experimental_method)),
      citation_year         = as.integer(citation_year),
      molecule_name         = as.character(molecule_name),
      molecule_type         = as_factor(as.character(molecule_type))
      ) %>% 
    distinct(pdb_id, .keep_all = TRUE)

Nucleosome structures by year

All figures are interactive (you can zoom in, and hovering over elements will show more information).

Nucleosome structures by year and experimental method

nucleosome_structures_year <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = citation_year, fill = experimental_method)) +
    guides(fill = guide_legend(title = "Experimental method")) +
    ggtitle("Structures of nucleosomes by year") +
    xlab("Publication year") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_structures_year)
2000200520102015202020250255075100
Experimental methodX-ray diffractionElectron MicroscopyStructures of nucleosomes by yearPublication yearNumber of PDB entries

Download figure in SVG format

Nucleosome structures by year and presence of a binding factor

nucleosome_structures_year_binding_factor <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = citation_year, fill = has_binding_factor)) +
    guides(fill = guide_legend(title = "Contains a binding factor")) +
    ggtitle("Structures of nucleosomes by year") +
    xlab("Publication year") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_structures_year_binding_factor)
2000200520102015202020250255075100
Contains a binding factorFALSETRUEStructures of nucleosomes by yearPublication yearNumber of PDB entries

Download figure in SVG format

Nucleosome structures by experimental method

nucleosome_structures_method <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = experimental_method, fill = experimental_method)) +
    guides(fill = guide_legend(title = "Experimental method")) +
    ggtitle("Structures of nucleosomes by experimental method") +
    xlab("") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_structures_method)
X-ray diffractionElectron Microscopy0100200300400
Experimental methodX-ray diffractionElectron MicroscopyStructures of nucleosomes by experimental methodNumber of PDB entries

Download figure in SVG format

Nucleosome structures by presence of a binding factor

nucleosome_binding_factors_methods <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = experimental_method, fill = has_binding_factor)) +
    guides(fill = guide_legend(title = "Presence of a binding factor")) +
    ggtitle("Structures of nucleosomes by presence of a binding factor") +
    xlab("Experimental method") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_binding_factors_methods)
X-ray diffractionElectron Microscopy0100200300400
Presence of a binding factorFALSETRUEStructures of nucleosomes by presence of a binding factorExperimental methodNumber of PDB entries

Download figure in SVG format

nucleosome_binding_factors <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = has_binding_factor, fill = has_binding_factor)) +
    guides(fill = guide_legend(title = "Presence of a binding factor")) +
    ggtitle("Structures of nucleosomes by presence of a binding factor") +
    xlab("Presence of a binding factor") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_binding_factors)
FALSETRUE0100200300
Presence of a binding factorFALSETRUEStructures of nucleosomes by presence of a binding factorPresence of a binding factorNumber of PDB entries

Download figure in SVG format

nucleosome_binding_factors_methods_2 <- pdb_data %>% 
    ggplot() +
    geom_bar(mapping = aes(x = has_binding_factor, fill = experimental_method)) +
    guides(fill = guide_legend(title = "Experimental method")) +
    ggtitle("Structures of nucleosomes by presence of a binding factor") +
    xlab("Presence of a binding factor") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_binding_factors_methods_2)
FALSETRUE0100200300
Experimental methodX-ray diffractionElectron MicroscopyStructures of nucleosomes by presence of a binding factorPresence of a binding factorNumber of PDB entries

Download figure in SVG format

Nucleosome structures by histone species

Crystal structures of nucleosomes by histone species

nucleosome_xtal_species <- pdb_data %>% 
    filter(experimental_method == "X-ray diffraction") %>% 
    mutate(is_histone = str_detect(molecule_name, pattern = "Histone H")) %>% 
    filter(is_histone == TRUE) %>% 
    mutate(organism_scientific_name = as_factor(as.character(organism_scientific_name))) %>% 
    ggplot() +
    geom_bar(mapping = aes(x = organism_scientific_name,
                           fill = organism_scientific_name)) +
    guides(fill = guide_legend(title = "Histone species")) +
    ggtitle("Crystal structures of nucleosomes by histone species") +
    xlab("") +
    ylab("Number of PDB entries") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(nucleosome_xtal_species)
Xenopus laevisMus musculusHomo sapiensSaccharomyces cerevisiae S288CDrosophila melanogasterArabidopsis thalianaLeishmania major0255075
Histone speciesXenopus laevisMus musculusHomo sapiensSaccharomyces cerevisiae S288CDrosophila melanogasterArabidopsis thalianaLeishmania majorCrystal structures of nucleosomes by histone speciesNumber of PDB entries

Download figure in SVG format

Cryo-EM structures of nucleosomes by histone species

nucleosome_cryoem_species <- pdb_data %>% 
    filter(experimental_method == "Electron Microscopy") %>% 
    mutate(is_histone = str_detect(molecule_name, pattern = "Histone H")) %>% 
    filter(is_histone == TRUE) %>% 
    mutate(organism_scientific_name = as_factor(as.character(organism_scientific_name))) %>% 
    ggplot() +
    geom_bar(mapping = aes(x = organism_scientific_name,
                           fill = organism_scientific_name)) +
    guides(fill = guide_legend(title = "Histone species")) +
    ggtitle("Cryo-EM structures of nucleosomes by histone species") +
    xlab("") +
    ylab("Number of PDB entries") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(nucleosome_cryoem_species)
Drosophila melanogasterXenopus laevisHomo sapiensGallus gallusSaccharomyces cerevisiae S288CTrypanosoma brucei brucei TREU927XenopusMarseillevirus marseillevirusKomagataella pastorisXenopus tropicalisMus musculusGiardia intestinalissynthetic construct050100150
Histone speciesDrosophila melanogasterXenopus laevisHomo sapiensGallus gallusSaccharomyces cerevisiae S288CTrypanosoma brucei brucei TREU927XenopusMarseillevirus marseillevirusKomagataella pastorisXenopus tropicalisMus musculusGiardia intestinalissynthetic constructCryo-EM structures of nucleosomes by histone speciesNumber of PDB entries

Download figure in SVG format

Resolution of nucleosome structures by histone species

Crystal structures

pdb_data %>% dplyr::filter(experimental_method == "X-ray diffraction") %>% .$resolution %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.580   2.450   2.773   2.899   3.085   9.700
nucleosome_xtal_resolution_species <- pdb_data %>% 
    filter(experimental_method == "X-ray diffraction") %>% 
    mutate(is_histone = str_detect(molecule_name, pattern = "Histone H")) %>% 
    filter(is_histone == TRUE) %>% 
    mutate(organism_scientific_name = as_factor(as.character(organism_scientific_name))) %>% 
    select(resolution, organism_scientific_name) %>% 
    ggplot() +
    geom_histogram(aes(x = resolution, fill = organism_scientific_name),
                   binwidth = 0.2) +
    guides(fill = guide_legend(title = "Histone species")) +
    ggtitle("Resolution of nucleosome crystal structures by histone species") +
    xlab("Resolution (Å)") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_xtal_resolution_species)
2.55.07.510.001020
Histone speciesXenopus laevisMus musculusHomo sapiensSaccharomyces cerevisiae S288CDrosophila melanogasterArabidopsis thalianaLeishmania majorResolution of nucleosome crystal structures by histone speciesResolution (Å)Number of PDB entries

Download figure in SVG format

Cryo-EM structures

pdb_data %>% dplyr::filter(experimental_method == "Electron Microscopy") %>% .$resolution %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.890   3.200   3.700   4.219   4.312  20.000       2
nucleosome_cryoem_resolution_species <- pdb_data %>% 
    filter(experimental_method == "Electron Microscopy") %>% 
    mutate(is_histone = str_detect(molecule_name, pattern = "Histone H")) %>% 
    filter(is_histone == TRUE) %>% 
    mutate(organism_scientific_name = as_factor(as.character(organism_scientific_name))) %>% 
    select(resolution, organism_scientific_name) %>% 
    ggplot() +
    geom_histogram(aes(x = resolution, fill = organism_scientific_name),
                   binwidth = 0.2) +
    guides(fill = guide_legend(title = "Histone species")) +
    ggtitle("Resolution of nucleosome cryo-EM structures by histone species") +
    xlab("Resolution (Å)") +
    ylab("Number of PDB entries") +
    theme_bw()
ggplotly(nucleosome_cryoem_resolution_species)
510150102030
Histone speciesDrosophila melanogasterXenopus laevisHomo sapiensGallus gallusSaccharomyces cerevisiae S288CTrypanosoma brucei brucei TREU927XenopusMarseillevirus marseillevirusKomagataella pastorisXenopus tropicalisMus musculusGiardia intestinalissynthetic constructResolution of nucleosome cryo-EM structures by histone speciesResolution (Å)Number of PDB entries

Download figure in SVG format

Dataset

The graphs presented above are derived from the following dataset:

# Format table for display
pdb_table <- pdb_data %>% 
    arrange(desc(citation_year)) %>% 
    select(`PDB code` = pdb_id,
           `Citation year` = citation_year,
           `Experimental method` = experimental_method,
           Title = title)
pdb_table
ABCDEFGHIJ0123456789
PDB code
<chr>
Citation year
<int>
Experimental method
<fct>
8u142024Electron Microscopy
8upf2024Electron Microscopy
8sn82024Electron Microscopy
8txx2024Electron Microscopy
8syp2024Electron Microscopy
8sn12024Electron Microscopy
8hr12024Electron Microscopy
8sn52024Electron Microscopy
8smw2024Electron Microscopy
8hqy2024Electron Microscopy

Download raw dataset in JSON format

