ggplot2 pollen diagram (and dendrogram)

In R, excellent packages provide ways to plot pollen or more generally stratigraphic diagrams. The two main tools come from the rioja package with “strat.plot” and the analogue package with “Stratiplot”. Althought those two functions are very comprehensive (you can include a dendrogram, pollen zones, etc.), easy to use, and highly customizable; I was still wondering if there is a way in R to plot a simple pollen diagram using only general plot syntax an preferably ggplot2. I came up with this simple solution that involve only ggplot2 syntax. Probably there is a possibility to add a dendrogram with grid.arrange and to overlay additionnal information but right now this approach seems sufficient to cover my very basic plotting needs:

library(ggplot2)
library(grid)
library(neotoma)
library(analogue)
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-1
## analogue version 0.17-0

First extract data from Neotoma using neotoma package, we will have a look at “Le Grand Etang de Suze-La-Rousse” in Provençal Drome by: “Argant, J. 1990. Climat et environnement au Quaternaire dans le bassin du Rhône d’après les données palynologiques. Documents du Laboratoire de Géologie de Lyon 3:1-199.”

suze <- get_site(sitename = 'Le Grand Etang%')
## The API call was successful, you have returned  1 records.
suze_pollen=get_dataset(suze)
suze_data=get_download(suze_pollen)
## API call was successful. Returned record for Le Grand Etang deSuze-La-Rousse
## API call was successful. Returned record for Le Grand Etang deSuze-La-Rousse
## The dataset ID 8962 is associated with a geochronology object, not count data.
## Warning in get_download.default(datasetid, verbose = verbose): Some datasetids returned empty downloads, be aware that length(datasetid) is longer than the download_list.
# head(suze_data[[1]]$counts)

core.pct <- data.frame(tran(suze_data[[1]]$counts, method = 'percent'))

age <- suze_data[[1]]$sample.meta$age
core.pct <- chooseTaxa(core.pct, max.abun = 10) 

Stratiplot(age ~ ., core.pct, sort = 'wa', type = 'poly',
           ylab ='Years Before Present')

Now we create a simple data.frame with information for plotting a pollen diagram in ggplot2:

df=data.frame(yr=rep(age,ncol(core.pct)),
  per=as.vector(as.matrix(core.pct)),
  taxa=as.factor(rep(colnames(core.pct),each=nrow(core.pct))))

We define a theme without gridlines, etc. and elements that will decrease readability of the diagram

theme_new <- theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # remove grids
                panel.background = element_blank(), axis.line = element_line(colour = "black"),
                strip.text.x = element_text(size=10, angle=90, vjust=0), # Taxa names 
                strip.background = element_blank(),
                strip.text.y = element_text(angle = 0),
                legend.position="none",panel.border = element_blank(),
                axis.text.x=element_text(angle=45,hjust=1)) # Axis tick label angle

And finally the pollen diagram:

ggplot(df)+
  geom_line(aes(yr,per))+
  geom_area(aes(yr,per))+
  scale_x_reverse(breaks =seq(0,100000,1000))+
  scale_y_continuous(breaks =seq(0,100,10))+
  xlab("Age (cal. BP)")+ylab("%")+
  coord_flip()+
  theme_new+
  facet_grid(~df$taxa,scales = "free", space = "free")

Add an exageration line (the quick and dirty way):

df$exag=df$per*10
id=unique(df$taxa)
for(i in 1:length(id)){
  uplim=which(df$exag[df$taxa==id[i]]>max(df$per[df$taxa==id[i]]))
  df$exag[df$taxa==id[i]][uplim]<-max(df$per[df$taxa==id[i]])
} 

ggplot(df)+
  geom_line(aes(yr,per))+
  geom_area(aes(yr,exag,fill=taxa))+
  geom_area(aes(yr,per))+
  scale_x_reverse(breaks =seq(0,1e8,1000))+
  scale_y_continuous(breaks =seq(0,100,10))+
  xlab("Age (cal. BP)")+ylab("%")+
  coord_flip()+
  theme_new+
  facet_grid(~df$taxa,scales = "free", space = "free")

 

Because only ggplot2 syntax is used you can play around and add geom_point, geom_bar layouts etc. and customize your diagram the way you want with the theme() function.

UPDATE: Adding a dendrogram is not straightforward but definitively doable. We will use a combinason of rioja and ggdendro package such as:

diss <- dist(sqrt(core.pct/100)^2)
clust <- chclust(diss, method="coniss")
# bstick(clust) # optionnally look at number of optimal zones

Now we will use ggdendro and modify its output in order to inject ages that will replace sample numbers:

dendro <- as.dendrogram(clust)
ddata <- dendro_data(dendro, type="rectangle")

## MOD the segment data frame for age instead of value order -----------------------
ddata$segments->yo
yo$xx=NA
yo$xxend=NA

for(i in 1:length(age)){
  yo$xx[which(yo$x == i)]=age[i]
  yo$xxend[which(yo$xend == i)]=age[i]
  da=age[i+1]-age[i]
  yo$xx[yo$x>i & yo$x<i+1]<-age[i]+(yo$x[yo$x>i & yo$x<i+1]-i)*da
  yo$xxend[yo$xend>i & yo$xend<i+1]<-age[i]+(yo$xend[yo$xend>i & yo$xend<i+1]-i)*da
} # Please dont ask....
ddata$segments$x<-yo$xx
ddata$segments$xend<-yo$xxend

# -----------------------------------------------------------------------------------

We finally can plot the dendrogram along with the diagram with gridExtra

dendro=ggplot(segment(ddata)) +
  geom_segment(aes(x=x, y=y, xend=xend, yend=yend)) +
  coord_flip(xlim=c(3000,14000)) +
  ylab("Distance")+
  scale_x_reverse(breaks =seq(0,1e8,1000))+
  theme_new+theme(axis.title.y=element_blank(),
                  axis.text.y=element_blank(),
                  axis.ticks.y=element_blank(),
                  axis.line.y = element_blank())


g1 = ggplotGrob(diag)
g2 = ggplotGrob(dendro)
g2$heights[5:7]<-g1$heights[6:8] # Important to align plots!
grid.arrange(g1, g2, widths =c(1,0.3),ncol=2,newpage = TRUE)

You can follow the overall logic to add any type of additional plot as long as its a ggplot2 object!

paleofire 1.1.9 !

A new version of the R package paleofire 1.1.9 is available on CRAN (https://CRAN.R-project.org/package=paleofire) and GitHub (https://github.com/oblarquez/paleofire). The release includes few bug fixes and improvements of existing functions, including more flexibility in pfAddData function and new options in kdffreq, see examples below:

library(paleofire)
# Control the type and format of user defined charcoal files
# here for examples csv files with three columns 
#(depth, age and char) separated with semicolon and with "." 
# character used in the file for decimal points:

head(read.csv("http://blarquez.com/public/data/Ben_area.csv"
              ,sep=";"))
files=c("http://blarquez.com/public/data/Ben_area.csv",
 "http://blarquez.com/public/data/Small_area.csv")
mydata=pfAddData(files=files, sep=";", dec=".")
# Transform and compositing:
TR=pfTransform(add=mydata, method=c("MinMax","Box-Cox","Z-Score"),
 BasePeriod=c(200,2000))
COMP=pfComposite(TR1, bins=seq(0,8000,500))
plot(COMP)

screen-shot-2016-10-13-at-9-44-55-am

 # Estimate the frequency of armed conflicts from 1946 to 2014 
 # using kernel density estimation from kdffreq
 # Data from the The Uppsala Conflict Data Program (UCDP) available at: https://www.prio.org

 dat=read.csv('http://ucdp.uu.se/downloads/ucdpprio/ucdp-prio-acd-4-2016.csv')
 res=kdffreq(dat$Year,bandwidth = "bw.ucv", nbboot=1000, up = 1946, lo = 2014, interval=1, pseudo=T)
 plot(res, ylab="# armed conflict/year")

screen-shot-2016-10-13-at-10-03-22-am

Data: Tree biomass reconstruction shows no lag in post-glacial afforestation of eastern Canada

Data from: Blarquez O. and J. Aleman. Tree biomass reconstruction shows no lag in post-glacial afforestation of eastern Canada. Canadian Journal of Forest Research. DOI: 10.1139/cjfr-2015-0201

You can use the application below for viewing and downloading specific time slice data. The shiny application is also available here, all data are zipped here. Simply decompress the archive, then in R use for example:

[code language=”R”]
load(‘acer_50km_data_8000BP.rda’)
# plot the raster object
plot(gr$raster)
# view data
head(gr$df)
# check projection
gr$proj4
# gr is an object of the class "pfGridding"
# from the paleofire package and can be manipulated
# and modified using the plot function:
library(paleofire)
?plot.pfGridding
p=plot(gr,continuous=F,col_class=c(0,5,10,15),
cpal = "Purples", anomalies=FALSE, points=TRUE,
empty_space = 5)
p
[/code]

 

paleofire 1.1.5

A new version of the R package paleofire is available on CRAN at http://cran.r-project.org/web/packages/paleofire/index.html

This release includes a new vignette on the basic usage of paleofire for reconstructing regional charcoal composite curves and a new function to easily export GCD sites to Google Earth .kml file.

Screen Shot 2014-12-01 at 3.28.06 PM

The australasian sites above have been exported using:

[code language=”R”]
library(paleofire)
x=pfSiteSel(id_region=="AUST")
require(sp)
pfToKml(x, file=’/Users/Olivier/Desktop/sites.kml’)
[/code]

 

 

paleofire package highlighted in the PAGES Magazine

The paleofire package has been highlighted in the April issue of the PAGES Magazine :

PAGES_April2014Vannière B., O. Blarquez, J. Marlon, A.-L. Daniau and M. Power. 2014. Multi-Scale Analyses of fire-climate- Vegetation Interactions on Millennial Scales. PAGES Magazine, Volume 22, Page 40.

The article presents the workshop of the Global Paleofire Working Group held in Frasne (France) on 2-6 October 2013 that was supported by PAGES. The pdf of the article can be downloaded here or directly from the PAGES website.

Introducing the paleofire package

The new R package, paleofire, has been released on CRAN. The package is dedicated to the analysis and synthesis of charcoal series contained in the Global Charcoal Database (GCD) to reconstruct past biomass burning.

paleofire is an initiative of the Global Paleofire Working Group core team, whose aim is to encourage the use of sedimentary charcoal series to develop regional to global synthesis of paleofire activity, and to ease access to the GCD data through a common research framework that will be useful to the paleofire research community. Currently, paleofire features are organized into three different parts related to (i) site selection and charcoal series extraction from GCD data; (ii) charcoal data transformation and homogenization; and (iii) charcoal series compositing and syntheses.

Installation:

paleofire is available from both CRAN website and GitHub

To install the official release (v1.0, 01/2014) from CRAN you just need to type this line at the R prompt:

[code language=”R”] install.packages("paleofire")[/code]

To install the development version of paleofire from the GitHub repository the devtools package is required: on Windows platform the Rtools.exe program is required in order to install the devtools package. Rtools.exe can be downloaded for a specific R version on http://cran.r-project.org/bin/windows/Rtools/

Once devtools is installed type the following lines at R prompt to install paleofire:

[code language=”R”]
library(devtools)
install_github("paleofire","paleofire")
library(paleofire)
[/code]

To test everything is working you can plot a map of all charcoal records included in the Global Charcoal Database v03:

[code language=”R”]
plot(pfSiteSel())
[/code]

For details and examples about paleofire please refer to the included manual.

More examples of the capabilities of paleofire to come in the near future…

Calculate pixel values at increasing radius

ndvi_classes.m function could be used to calculate the number and percentage of pixels at increasing radius from a lake following certain criterion’s. The function was originally developed to count the number of pixels falling in specific NDVI classes around lakes but may be useful for different proxies and situations. The function use georeferenced tiff images as input. For details please refer to Aleman et al. (2013) or to the included help. The image used in the example can be downloaded here.

The function:

[code language=”matlab”]
function [classe,classe_per]=ndvi_classes(ImageName,critere1,critere2)

% INPUT: ImageName is the name of the geotiff image for imput
% critere1 (and 2) criterion for classes evaluation
%
% OUTPUT: classe: vector of NDVI classes at increasing radius from
% the lake center (number of pixel falling in class)
% classe_per: Same as classe but in percentage of pixels
% EXAMPLE: Using the given Image ‘classif_GBL_94.tif’
%{
[classe classe_per]=ndvi_classes(‘classif_GBL_94.tif’,’>2′,'<4′);
%Plot the classe as a function of distance in km
%(1pixel=0.0301km, input 200 pixels when asked)
plot((1:200)*0.0301,classe_per,’k-‘)
ylabel(‘% pixels >2 and <4’)
xlabel(‘Distance from lake center (km)’)
%}
% Blarquez Olivier 2012
% blarquez@gmail.com

[Imge,dat] = geotiffread(ImageName);

imagesc(double(Imge))

select= input(‘Select point interactively? Y/N: ‘, ‘s’);
if select==’Y’
title(‘Please double clic on lake center:’)
p = impoint(gca,[]);
p = wait(p);
p=round(p);
elseif select==’N’
valstring = input(‘Enter latitude and longitude: ‘, ‘s’);
valparts = regexp(valstring, ‘[ ,]’, ‘split’);
coords = str2double(valparts);
[~,p(2)]=min(abs(linspace(dat.Latlim(1),dat.Latlim(2),dat.RasterSize(1))-…
coords(1)));
[~,p(1)]=min(abs(linspace(dat.Lonlim(1),dat.Lonlim(2),dat.RasterSize(2))-…
coords(2)));
end

radius = input(‘Radius in pixels to be evaluated: ‘);
Imge(:,((p(1)+(radius)):end))=[];
Imge(:, 1:(p(1)-(radius)))=[];
Imge(((p(2)+(radius)):end),:)=[];
Imge(1:(p(2)-(radius)),:)=[];

n=length(Imge);

classe=zeros(radius,1);
classe_per=zeros(radius,1);
for r=1:radius
warning(‘off’,’all’)
imshow(Imge)
%imagesc(double(Imge))

hEllipse = imellipse(gca,[(n/2-(r)) n/2-(r) r*2 r*2]);
maskImage = hEllipse.createMask();
imshow(maskImage);

maskedImge = Imge .* cast(maskImage,class(Imge));
%imshow(maskedImge);
maskedImge(maskedImge==0)=NaN;
pixList=reshape(maskedImge,[],1);
pixList(isnan(pixList),:)=[];

classe(r,1)=length(pixList(eval([‘pixList’,critere1]) & …
eval([‘pixList’,critere2]),:));
classe_per(r,1)=classe(r,1)./length(pixList(:,1));
end
close all
warning(‘on’,’all’)
end
[/code]

Paleofire reconstruction based on an ensemble-member strategy

Matlab codes and data associated with the manuscript: Blarquez O., M. P. Girardin, B. Leys, A. A. Ali, J. C. Aleman, Y. Bergeron and C. Carcaillet. 2013. Paleofire reconstruction based on an ensemble-member strategy applied to sedimentary charcoal. Geophysical Research Letters 40: 2667–2672, are available here.

Preprocessed data for Lac à la Pessière and Lago perso used in the paper also available:
PESSIERE.mat.zip
PERSO.mat.zip