gRaphics!: May 2012

Monday, May 28, 2012

Again on polar/star/pie charts

Haven't had much time to devote to new visualisations, mostly because work and baby have taken the precedence.

But I just wanted to take a few minutes to share the latest version of the script I showed last time.

It now saves to a pdf file, but that's not the largest change. I actually included one more slice which is obtained from the others with a formula (e.g. average, or geometrical mean, in this case the sum) and is used to rank the pies accordingly. The new value is represented as a white wedge on top of everything else, with its value pasted over it. The script wraps compounds and fit only a limited number per page, then moves on to the next one

There are a lot of things which may be changed, such as having the value as a bubble at the centre rather than a new wedge, to conserve the pies' proportionality... Or just showing the 'score' in a corner of the plot, with other info at the bottom... None of this is implemented as of yet, sorry. The code commented-out shows some graphics alternatives which I did try and set aside for the moment.
Code follows:

	require(ggplot2)
	require(reshape)
	# windows()
	getRandString<-function(len=12) return(paste(sample(c(LETTERS,letters),len,replace=TRUE),collapse=''))

	cpd_name_len<-8
	descr_name_len<-3
	# let's create a dummy set
	nvars<-4; varnames <- as.character(lapply(X=rep(descr_name_len,times=nvars), FUN=getRandString)) # the number and names of the variables
	ncpd<-25; cpd_x_row<-5; rows_x_page<-6; cpd_x_page<-cpd_x_row*rows_x_page
	cpdnames <- as.character(lapply(X=rep(cpd_name_len,times=ncpd), FUN=getRandString)) # the number and name of item (in my case compounds)
	facet_font_size<-if(max(length(cpdnames)) < 12) 8 else 4
	# a matrix filled with pseudorandom gibberish
	MyMatrx<-matrix(ncol=nvars,nrow=ncpd,data=sample(5, repl=T, size=ncpd*nvars))
	rownames(MyMatrx)<-cpdnames; colnames(MyMatrx)<-varnames
	# Reorder the matrix by sum of columns - in an attempt of plotting first 'full' pies, then emptier ones
	# MyMatrx<-MyMatrx[rev(order(rowSums(MyMatrx))),] # this works, but plotting order is unaltered...
	MyMatrx<-cbind(A00=exp(apply(apply(MyMatrx,c(1,2),log),1,mean)), MyMatrx) # dding the sum in...
	MyMatrx<-MyMatrx[rev(order(MyMatrx[,'A00'])),] # this works, but plotting order is unaltered...
	# now melt your dataframe so as to be amenable to plotting as bargraph (of which piechart are but a subset)
	DF <- melt(MyMatrx, varnames=c('cpd','variable'))
	DF$variable<-relevel(DF$variable, 'A00') # reorders the levels so that A00 is first
	DF$main<-'component'; DF$main[DF$variable=="A00"]<-'main';
	DF$cpd <- factor(DF$cpd, levels=row.names(MyMatrx))
	DF<-DF[order(DF$cpd),] # reordering in the hope that it will keep all of a compound records together
	# # let's now print out a series of Vlaaivis, faceted according to each compound - that is, one Vlaaivis x compound.
	# p1<-ggplot(DF, aes(factor(variable), value, fill = factor(variable))) + geom_bar(width = 1, alpha=0.5) + scale_y_continuous(breaks = 0:10) + coord_polar() + labs(x = "", y = "") + opts(legend.position = "none", axis.text.y = theme_blank(), axis.ticks = theme_blank()) + facet_wrap( ~ cpd, ncol = 5)
	# # It works!!! (Albeit not perfectly)
	# pdf(sprintf("%s.%s","C:/Users/LucaF/Documents/My Dropbox/Vlaaivis",'pdf'), onefile=T, width=8,height=12, useDingbats=F);
	# print(p1);
	# # savePlot("C:/Users/LucaF/Documents/My Dropbox/Vlaaivis.png",type='png')
	# dev.off()

	pdf(sprintf("%s.%s",file.path(Sys.getenv("USERPROFILE"),"Documents/My Dropbox/Vlaaivis"),'pdf'), onefile=T, width=8,height=12, useDingbats=F);
	cpd_starts<-seq(1,ncpd,by=cpd_x_page)
	for (pagina in 1:(ceiling(ncpd/cpd_x_page))) {
	inizio<-min(nrow(DF),cpd_starts[pagina]*(nvars+1)-((nvars+1)-1));
	fine<-min(nrow(DF),cpd_starts[pagina](nvars+1)+(nvars+1)cpd_x_page-(nvars+1))
	sliceseq<-(inizio):(fine); sliceseq<-sliceseq[which(sliceseq<=nrow(DF))]
	print(Slice<-DF[sliceseq,])
	# let's now print out a series of Vlaaivis, faceted according to each compound - that is, one Vlaaivis x compound.
	# version 1: one wedge, almost touching, black outline, opaque filling...
	# p1<-ggplot(Slice, aes(factor(variable), sqrt(value), fill = factor(variable))) + geom_bar(width = .95, alpha=1, col='black') + scale_y_continuous(breaks = 0:10) + coord_polar() + labs(x = "", y = "") + opts(legend.position = "top", axis.text.x = theme_blank(), axis.text.y = theme_blank(), axis.ticks = theme_blank()) + facet_wrap( ~ cpd, ncol = cpd_x_row)
	# version 2: a bit fuzzied up by different bin widths...
	# p1<-ggplot(Slice, aes(factor(variable), sqrt(value), fill = factor(variable))) + geom_bar(width = jitter(rep(.9, 5), factor=10), alpha=.3) + scale_y_continuous(breaks = 0:10) + coord_polar() + labs(x = "", y = "") + opts(legend.position = "none", axis.text.y = theme_blank(), axis.ticks = theme_blank()) + facet_wrap( ~ cpd, ncol = cpd_x_row)
	# version 3: fuzzied up both on the x and the y axes
	# p1<-ggplot(Slice)
	# for (n in 1:10) {
	# p1<-p1 + geom_bar(aes(factor(variable), sqrt(jitter(value, factor=10)), fill = factor(variable)), width = jitter(rep(.9, 10), factor=10), alpha=.01)
	# }
	# p1<-p1 + scale_y_continuous(breaks = 0:10) + coord_polar() + labs(x = "", y = "") + opts(legend.position = "top", axis.text.x = theme_blank(), axis.text.y = theme_blank(), axis.ticks = theme_blank()) + facet_wrap( ~ cpd, ncol = cpd_x_row)
	# version 4: not sure yet
	Slice_main<-Slice[Slice$main=='main',]
	Slice_othr<-Slice[Slice$main!='main',]
	p1<-ggplot()
	for (n in 1:2) {
	if (n==1) p1<-p1 + geom_bar(data=Slice_main, aes(factor(variable), value), width = 1, alpha=1, fill = 'white', col='black') + geom_text(data=Slice_main, aes(x=factor(variable), y=2*sqrt(value)/3, label=round((value))), size=3)
	if (n!=1) p1<-p1 + geom_bar(data=Slice_othr, aes(factor(variable), (value), fill = factor(variable)), col='black', lwd=0.1, width = 1, alpha=.5)
	}
	p1<-p1 + scale_y_sqrt(limits=c(0,max(DF$value))) + labs(x = "", y = "") + opts(legend.position = "top", axis.text.y = theme_blank(), axis.text.x = theme_blank(), axis.ticks = theme_blank()) + coord_polar(start=-pi/(nvars+1))
	#p1<-p1 + facet_grid(cpd ~ main)
	p1<-p1 + facet_wrap(~ cpd, ncol=cpd_x_row) + opts(strip.text.x = theme_text(size = facet_font_size))
	# It works!!! (Albeit not perfectly)
	print(p1);
	}
	dev.off()

view raw gistfile1.r hosted with ❤ by GitHub

Wednesday, May 16, 2012

My take on polar bar (a.k.a. consultant's) charts

Once upon a time, when I was working at Johnson & Johnson (pharma branch), I was surrounded by a bunch of programmers working to develop (among other things) a nifty piece of software for internal use. Part of it was later released as freeware, called Vlaaivis. The main idea was to visualize each the compound's many data at once, with each property being represented by a slice within the pie. For each property, ti was possible to define an ideal value, or range, and values above or below that one would show up as incomplete slices of two shades of the same color... This way, the fuller the pie the better, or more 'dieal', the compound under exam would be.
You may find it still at its home, http://www.vlaaivis.com/.

As I moved on to different things, I still remembered this as a good way of visualising multifactorial data, especially when comparing several candidates (compounds, in my case). Inspired by a post on LearnR, I decided to start reimplementing something similar in R for use within our own group.

The most significant change from the LearnR post is that I added in a 'facets_grid()' call to the ggplot, so as to split the polar bar chart in the different compounds. Thanks to the faceting, all compounds are plotted on the same 'space' and are therefore immediately comparable.

The other, minor, change I made was to create the dataframe as a matrix, which is the way we usually store such kind of data, and use 'melt', from the package 'reshape', to convert it into a form suitable for plotting as bar chart (polar or not). I left in the comments an option for reading in the data from a text file...

Here's the dummy template I came up with:

	# My take on multi-variable pie charts. Inspired by:
	# http://learnr.wordpress.com/2010/08/16/consultants-chart-in-ggplot2/
	require(ggplot2)
	require(reshape)
	# let's create a dummy set
	nvars<-3; varnames <- letters[1:nvars] # the number and names of the variables
	ncpd<-16; cpdnames <- toupper(letters[1:ncpd]) # the number and name of item (in my case compounds)# a matrix filled with pseudorandom gibberish
	MyMatrx<-matrix(ncol=nvars,nrow=ncpd,data=sample(5, repl=T, size=ncpd*nvars))
	rownames(MyMatrx)<-cpdnames; colnames(MyMatrx)<-varnames
	# alternatively, one could read such a data structure from a file...
	# MyMatrx<-read.table(file="khuuhhl.uyg")
	# Reorder the matrix by sum of columns - in an attempt of plotting first 'full' pies, then emptier ones
	MyMatrx<-MyMatrx[order(rowSums(MyMatrx)),] # this works, but ploting order is unaltered...
	# now melt your dataframe so as to be amenable to plotting as bargraph (of which piechart are but a subset)
	DF <- melt(MyMatrx, varnames=c('cpd','variable'))
	DF$cpd <- factor(DF$cpd, levels=rev(row.names(MyMatrx))) # this sets the order for the facet plotting
	# let's now print out a series of Vlaaivis, faceted according to each compound - that is, one Vlaaivis x compound.
	ggplot(DF, aes(factor(variable), value, fill = factor(variable)), color='black') + geom_bar(width = 1, alpha=0.5) + scale_y_continuous(breaks = 0:10) + coord_polar() + labs(x = "", y = "") + opts(legend.position = "none", axis.text.y = theme_blank(), axis.ticks = theme_blank()) + facet_wrap( ~ cpd) # facet_wrap, not grid
	# It works!!! (Albeit not perfectly)

view raw gistfile1.r hosted with ❤ by GitHub

And here's my output:

If, as I described previously, the bars were some kind of normalised score, such as the recently suggested druglikeness score, then the fuller the pie, the better looking the compound would be for a medicinal chemist.

I omitted the legend, since the variable names (a-e) is present in each plot (~~does anyone know how can I get rid of the 0.5 legend key? it comes from the alpha definition in the ggplot~~).

Two major things left to do:

I would like to plot the compound in order of 'fullness' - a sort/order snippet is there in the code, and the new ordering survives the melting of the matrix - however, ggplot seems to rearrange the data according to some internal order... (Thanks to Christoph for fixing this)
Right now, the code isn't suitable for too many compounds, since the facets_grid() will arrange them horizontally. I would be grateful if someone were to let me know how to automatically arrange them in a grid of a given maximum number of columns... I know how to do that when I explicitly create each plot, but then I loose the ease of comparison which comes from all compounds being plotted on the same scale... ( Thanks to Christoph for fixing this too)

I'll update this text and the code as I improve the visualization.

If you don't feel like messing around with code, you can always try and build a similar plot using deduceR:
http://www.r-statistics.com/2010/08/rose-plot-using-deducers-ggplot2-plot-builder/

Hope one or more of you find this useful!

Monday, May 14, 2012

Plotting data and distribution simultaneously (with ggplot2)

Ever wanted to see at a glance the distribution of your data across different axes? It happens often to me, and R allows to build a nice plot composition - This is my latest concoction. I used ggplot2 here, but equivalent graphics can be made using either base graphics, or lattice.

The set is the usual 'iris', the central plot has petal length and width along the X/Y axes - I used a customised color palette so as to be friendlier to color-blind people. On the left and at the top of the main plot, the density distribution of the whole set (grey) and by subspecies.

	library(ggplot2);
	library(grid);

	data(iris)

	x <- jitter(iris[,c('Sepal.Length')])
	y <- jitter(iris[,c('Sepal.Width')])
	z <- factor(iris[,c('Species')])

	# The color blind palette without black:
	cbnbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

	vplayout <- function(x, y)
	viewport(layout.pos.row = x, layout.pos.col = y)

	df<-data.frame(x,y,z) # just create a dataframe - x y and z are easier to write than Petal.length and so on
	# I now define the (gg)plots
	# old p1 and p2 (..density.. plots)...
	# p1<-ggplot(df) + scale_fill_manual(values=cbnbPalette) + geom_density(aes(x = x, y = -..density..), col='black', fill="#CCCCCC") + geom_density(aes(x = x, y = ..density.., fill = subsp, alpha=0.4)) +theme_invisible() + opts(legend.position = "none")
	# p2<-ggplot(df) + scale_fill_manual(values=cbnbPalette) + geom_density(aes(x = y, y = -..density..), col='black', fill="#CCCCCC") + geom_density(aes(x = y, y = ..density.., fill = subsp, alpha=0.4)) +theme_invisible() + opts(legend.position = "none") +coord_flip()
	# now susbstituted by ..count.. plots as suggested by Andrew in comments
	p1<-ggplot(df) # based on the dataframe just defined
	+ scale_fill_manual(values=cbnbPalette) # using the colorblind-friendly palette
	+ geom_density(aes(x = x, y = -..count.., col=subsp), fill="#CCCCCCCC", position = "stack") # overall density plot - plotted on the negative
	+ geom_density(aes(x = x, y = ..count.., fill = subsp, alpha=0.4)) # so as to be specular to the densities by subspecies
	+ theme_invisible() # oh yeah, I don't want any other graphical element to crowd this plot - the x axys is the same as in the main plot
	+ opts(legend.position = "none")
	# this is a second density plot, oriented vertically (hence the 'coord_flip()' at the end
	p2<-ggplot(df) + scale_fill_manual(values=cbnbPalette) + geom_density(aes(x = y, y = -..count.., col=subsp), fill="#CCCCCCCC", position = "stack") + geom_density(aes(x = y, y = ..count.., fill = subsp, alpha=0.4)) +theme_invisible() + opts(legend.position = "none") +coord_flip()
	#finally the main x/y plot - nothing to write home about
	p3<- ggplot(df) + scale_colour_manual(values=cbbPalette) + geom_point(aes(x = x, y = y, col=subsp)) + opts(legend.position = c(1.2,1.2))
	#now let's print the plot to screen!
	grid.newpage()
	pushViewport(viewport(layout = grid.layout(5, 5))) # a 5 by 5 grid
	print(p1, vp=vplayout(1,1:4)) # the first density plot will occupy the top of the grid
	print(p3, vp=vplayout(2:5,1:4)) # the main x/y plot will instead spread across most of the grid
	print(p2, vp=vplayout(2:5,5)) # with the second density plot occupying a narrow vertical strip at the right
	# done! Enjoy!

view raw gistfile1.r hosted with ❤ by GitHub

Well, I hope the code is clear - this time I commented it a bit more...

Saturday, May 12, 2012

My own version of bubble plot (part 1)

During one of my projects, I found myself in need of visualizing more than 3 dimensions at once. Three-dimensional graphs are not a good solution, usually - they will need to be properly oriented, for a start, ad that's tricky.

So, I started looking at bubble plots. The size of the bubble can show one property, as illustrated by the nice post at FlowingData - then you can show one more property defined by a color scale (continuous below, but nothing stops it from being categorical)

I decided to push it and have two properties: look at the example below - the redder the color, the higher the value on the property ApKUpt (or whatever you want). The greener, the higher ApVUpt. I moved the color legend to a square on the extreme right to achieve a better use of the available space.

I tried three colors but it turns out that it just doesn't work. Even when your eyes don't interpret every rgb triplet as a completely different color, the amount of redness, greeness or blueness is difficult to estimate. Also, it gets tricky to show the color grading in a legend... One has to resort to slices of the three-dimensional color space. See what I mean?

Of course, one can define an ad-hoc color scale, such as the one used below, vaguely inspired by the colors that Mathematica uses to paint its surfaces. Many thanks to my colleague Pär for teaching me how to define these kind of color scales, and much else.

Here follows the code for the one, two and three colors plot:

# bubble plot - takes in a dataframe with five different properties, for X, Y, Size, ColorA, B and C, plus additional parameters to control appearances of the plots produced

bubbles_in_2colors<-function( dataset, to_jitter=F, prop_names=NULL, titles=list(main='X vs Y', color='colors', size='size'), min_size = 1, max_size = 5, max_sat = 200, fav_col='green', alpha_default=0.7, ...) {

# NB: this code can handle a third primary color, but human eyes can't so for the moment I've forbidden that option.

if(exists('dataset')) {

if (is.null(prop_names)) {print(prop_names<-names(dataset))};

if (length(prop_names) ==4) {propX<-dataset[,1]; propY<-dataset[,2]; propS<-dataset[,3]; propCA<-dataset[,4]}

else if (length(prop_names) ==5) {propX<-dataset[,1]; propY<-dataset[,2]; propS<-dataset[,3]; propCA<-dataset[,4]; propCB<-dataset[,5]}

else if (length(prop_names) ==6) {propX<-dataset[,1]; propY<-dataset[,2]; propS<-dataset[,3]; propCA<-dataset[,4]; propCB<-dataset[,5]; propCC<-dataset[,6]; print("Three colors are too many for measly human eyes")}

else {stop(sprintf("I don't know what to do with an array with %d properties",length(prop_names)))}

}else {stop('did you actually give me a dataset to work with?')}

rangepropX<-range(propX); log_x=log_it(rangepropX); if (!exists('xlab')) {xlab=prop_names[1]}

rangepropY<-range(propY); log_y=log_it(rangepropY); if (!exists('ylab')) {ylab=prop_names[2]}

rangepropS<-range(propS); log_s=log_it(rangepropS);

# the line below restricts the sizes in case the span of values isn't too spread out.

Max2min<- rangepropS[2]/rangepropS[1]; if ((!log_s) & (Max2min<5)) {min_size=2; max_size=min_size*Max2min}

log_xy=paste(if(log_x){'x'}else{''},if(log_y){'y'}else{''},sep="");

rangepropCA<-range(propCA); log_cA=log_it(rangepropCA);

if (exists('propCB')) {rangepropCB<-range(propCB); log_cB=log_it(rangepropCB);} else{rangepropCB<-c(0,0); log_cB=F}

if (exists('propCC')) {rangepropCC<-range(propCC); log_cC=log_it(rangepropCC);} else{rangepropCC<-c(0,0); log_cC=F}

pch <- if (!exists('pch')) {21} else {pch} # 21 for circles, 22 for squares, 23 for diamonds, 24 for triangles up, 25 for triangles down

# the color palette is obtained by spreading the known palette (log)proportionally across the range of propC

# how about setting the color directly in rgb space?

if (!exists('palette_l')) {palette_l<-50}

colorpalette<-0:palette_l*(max_sat/255)/palette_l

print(colorpalette)

if(fav_col=='red') {

r_colpaltt<- colorpalette[1+invprop(propCA,rangepropCA[1],rangepropCA[2],log_cA)*(palette_l-1)]

g_colpaltt<- if (exists('propCB')) colorpalette[1+invprop(propCB,rangepropCB[1],rangepropCB[2],log_cB)*(palette_l-1)] else 0

b_colpaltt<- if (exists('propCC')) colorpalette[1+invprop(propCC,rangepropCC[1],rangepropCC[2],log_cC)*(palette_l-1)] else 0

}else if(fav_col=='green') {

g_colpaltt<- colorpalette[1+invprop(propCA,rangepropCA[1],rangepropCA[2],log_cA)*(palette_l-1)]

r_colpaltt<- if (exists('propCB')) colorpalette[1+invprop(propCB,rangepropCB[1],rangepropCB[2],log_cB)*(palette_l-1)] else 0

b_colpaltt<- if (exists('propCC')) colorpalette[1+invprop(propCC,rangepropCC[1],rangepropCC[2],log_cC)*(palette_l-1)] else 0

}else if(fav_col=='blue') {

b_colpaltt<- colorpalette[1+invprop(propCA,rangepropCA[1],rangepropCA[2],log_cA)*(palette_l-1)]

g_colpaltt<- if (exists('propCB')) colorpalette[1+invprop(propCB,rangepropCB[1],rangepropCB[2],log_cB)*(palette_l-1)] else 0

r_colpaltt<- if (exists('propCC')) colorpalette[1+invprop(propCC,rangepropCC[1],rangepropCC[2],log_cC)*(palette_l-1)] else 0

}

colpaltt<- rgb(r_colpaltt, g_colpaltt, b_colpaltt, alpha_default)

# print(colpaltt)

if (!length(colpaltt[!is.na(colpaltt)])) colpaltt[is.na(colpaltt)]<-'black'

# the size palette is obtained by (log)proportional representation of the propC range in the min_size-max_size range.

# minimum and maximum for printing symbols 0.5 to 1, or given by the user.

sizepaltt<-(prop(invprop(propS,rangepropS[1],rangepropS[2],log_s),min_size, max_size, log_s))

if (!length(sizepaltt[!is.na(sizepaltt)])) sizepaltt[is.na(sizepaltt)]<-1

# print("defined palettes"); print(colpaltt); print(sizepaltt)

if (to_jitter) {

jittered_X <- jitter_logalong(propX);

jittered_Y <- jitter_logalong(propY);

}else{

jittered_X <- (propX);

jittered_Y <- (propY);

}

# print("jittered X and Y")

# layout(matrix(c(1,1,1,2,1,1,1,3), 2, 4, byrow = TRUE)) # this works but let's try something fancier

if(!exists('propCB')) {layout(matrix(c(2,2,1,3), 2, 2, byrow = TRUE), widths=c(3,1), heights=c(1,3))}

else if(!exists('propCC')) {layout(matrix(c(1,2,1,3), 2, 2, byrow = TRUE), widths=c(3,1), heights=c(2,2))}

else {layout(matrix(c(1,2,1,3,1,4,1,5), 4, 2, byrow = TRUE), widths=c(4,1), heights=c(1,1,1,1))}

# print(sprintf("X from %G to %G, Y from %G to %G",rangepropX[1],rangepropX[2],rangepropY[1],rangepropY[2]))

plot(NULL, xlim=rangepropX, ylim=rangepropY, log=log_xy, bty='n', xaxt='n', yaxt='n', main=titles$main, xlab=prop_names[1], ylab=prop_names[2]); # , xaxs="i" to plot exactly within xlim,

# grid(nx = NULL, ny = NULL, col = "darkgray", lty = "dotted", lwd = 1, equilogs = T)

my_grid(nx = NULL, ny = NULL, xlims=rangepropX, ylims=rangepropY, col = "darkgray", lty = "dotted", lwd = 1, equilogs = T)

axXticks<-sort(unique(propX));

if (length(axXticks) > 3) {

if(log_it(axXticks)) {

axXticks<-within(log_pretty(axXticks, 1),axXticks,F, T)

}else{

axXticks<-within(pretty(axXticks),axXticks,F, T)

}

}

axis(1, at=axXticks, labels=sprintf("%5.3g",axXticks))

axYticks<-sort(unique(propY));

if (length(axYticks) > 3) {

if(log_it(axYticks)) {

axYticks<-within(log_pretty(axYticks, 1),axYticks,F, T)

}else{

axYticks<-within(pretty(axYticks),axYticks,F, T)

}

}

axis(2, at=axYticks, labels=sprintf("%5.3g",axYticks))

# Not yet working - attempt to reorder points so that the small ones are plotted above the little ones

# for (n in 1:length(propS)) {indx<-c(indx,which(propS==sort(propS)[n]))}

# indx<-rev(indx)

# jittered_X<-jittered_X[indx]

# jittered_Y<-jittered_Y[indx]

# propS<-propS[indx]

# propC<-propC[indx]

points(jittered_X, jittered_Y, , pch=21, cex=sizepaltt, col="white", bg=colpaltt);

if (length(propX) < 10) {text(x=jittered_X, y=jittered_Y, labels=dots_names, pos=4)}

# now it's the time to print a nice colorspace as legend for the bubbles

# the colorbar is a second plot, going at the top right (see layout(matrix()) command, much above)

# I'll have two different colorspaces depending on whether two or three dimensions are being used as colors

if(!exists('propCB')) {

plot(NULL, xlim=range(propCA), ylim=c(0,1), log=if(log_cA) 'x' else '', xaxt='n', yaxt='n', xlab=prop_names[4], ylab=prop_names[5], bty='n', main=titles$color);

for (n in 1:palette_l) {

if (fav_col == 'red') rgbcol<-rgb(colorpalette[n], 0, 0, alpha_default)

else if (fav_col == 'green') rgbcol<-rgb(0, colorpalette[n], 0, alpha_default)

else if (fav_col == 'blue') rgbcol<-rgb(0, 0, colorpalette[n], alpha_default)

rect(xleft= prop(x=(n-1)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)

,xright= prop(x=(n)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)

,ytop= 1

,ybottom= 0

, col=rgbcol

, border="transparent"

)

}

ycolorbarlegend<- 0.5

colorticks<- if (log_cA) within(log_pretty(rangepropCA, 1),rangepropCA, F, F) else within(pretty(rangepropCA),rangepropCA, F)

axis(1, at=colorticks, labels=sprintf("%5.3g",colorticks))

}else if(!exists('propCC')) {

plot(NULL, xlim=range(propCA), ylim=range(propCB), log=paste(if(log_cA) 'x' else '',if(log_cB) 'y' else '', sep=''), xaxt='n', yaxt='n', xlab=prop_names[4], ylab=prop_names[5], bty='n', main=titles$color);

for (m in 1:palette_l) {

for (n in 1:palette_l) {

rgbcol<-rgb(colorpalette[n],colorpalette[m],0, alpha_default);

rect(xleft= prop(x=(n-1)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)

,xright= prop(x=(n)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)

,ytop= prop(x=(m-1)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)

,ybottom= prop(x=(m)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)

,col=rgbcol

,border="transparent"

)

}

}

colorAticks<- if (log_cA) within(log_pretty(rangepropCA, 1),rangepropCA, F, T) else within(pretty(rangepropCA),rangepropCA, F, T)

axis(1, at=colorAticks, labels=sprintf("%5.3g",colorAticks))

colorBticks<- if (log_cB) within(log_pretty(rangepropCB, 1),rangepropCB, F, T) else within(pretty(rangepropCB),rangepropCB, F, T)

axis(2, at=colorBticks, labels=sprintf("%5.3g",colorBticks))

}else{

for (o in c(1,palette_l/2, palette_l)) {

plot(NULL, xlim=range(propCA), ylim=range(propCB), log=paste(if(log_cA) 'x' else '',if(log_cB) 'y' else '', sep=''), xaxt='n', yaxt='n', xlab=prop_names[4], ylab=prop_names[5], bty='n', main=sprintf("ColC=%g percent",100*o/palette_l));

for (m in 1:palette_l) {

for (n in 1:palette_l) {

rgbcol<-rgb(colorpalette[n],colorpalette[m],colorpalette[o], alpha_default)

rect(xleft= prop(x=(n-1)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)

,xright= prop(x=(n)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)

,ytop= prop(x=(m-1)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)

,ybottom= prop(x=(m)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)

,col=rgbcol

,border="transparent"

)

}

}

# flush.console(); Sys.sleep(0.5)

}

colorAticks<- if (log_cA) within(log_pretty(rangepropCA, 1),rangepropCA, F, T) else within(pretty(rangepropCA),rangepropCA, F, T)

axis(1, at=colorAticks, labels=sprintf("%5.3g",colorAticks))

colorBticks<- if (log_cB) within(log_pretty(rangepropCB, 1),rangepropCB, F, T) else within(pretty(rangepropCB),rangepropCB, F, T)

axis(2, at=colorBticks, labels=sprintf("%5.3g",colorBticks))

}

# the bubble legend is a third plot, going in the lower right.

plot(NULL, xlim=widen(c(-1,1), 0.9), ylim=widen(rangepropS, 0.9), log=if(log_s) 'y' else '', xaxt='n', yaxt='n', xlab='', ylab='', bty='n', main=titles$size);

circles <- if (log_s) within(log_pretty(rangepropS, 1), rangepropS, strict=F, wantrng=F) else within(pretty(rangepropS), rangepropS, strict=F, wantrng=F) #all of them

# but I only select five at the most:

circles<-rev(circles[ceiling((1:5)*length(circles)/5)]) # select 5 circles for the legend.

# I may want to get rid of one of the the last two, in case their values are too close

# if (log_s) {if(circles[length(circles)]/circles[length(circles)-1] < 25) {circles<-circles[1:(length(circles)-1)]}}

# else {if((circles[length(circles)]-circles[length(circles)-1]) < 0.7(circles[length(circles)-1]-circles[length(circles)-2])) {circles<-circles[1:(length(circles)-1)]}}

# xlegend<-2

points(rep(-1,length(circles)), circles, pch=21, bg='grey', col='white', cex=prop(invprop(circles, min(circles), max(circles)),min(sizepaltt),max(sizepaltt)))

text(rep(0,length(circles)),circles, labels=sprintf("%5.3g",circles))

}

view raw gistfile1.r hosted with ❤ by GitHub

It's messy and not at all clean - but it gets the job done. This routine is also dependent from several others which define colorscale and other accessory functions... feel free to drop me a line in the comments if you want the lot... Similar plots can be obtained with ggplot2 in much fewer lines, although right now I'm less expert at it so they're much less customised.

First Post: Welcome to this new blog!!!

It's been almost one years that I've started using R as my main programming/analysis tool.

I like the fact that so many beautiful graphics can be produced directly within R.

Although I often just use the basic functionalities, often my work pushes me to develop more complex visualisations which I'd like to share with others so that my efforts aren't wasted after I'm done using them.

Here I'll do my best to share, in the hope that they may be useful to someone, and that more expert users may point out ameliorations to the code, as well.

Later on I'll add this blog to the R-Bloggers feed so that I can contribute back to where I picked up so much inspiration.

Update - 15/05/2012 - I just added the feed to R-Bloggers, to celebrate I\'ll do my best to put out a nice pie chart. My take on the consultant charts!

	# bubble plot - takes in a dataframe with five different properties, for X, Y, Size, ColorA, B and C, plus additional parameters to control appearances of the plots produced
	bubbles_in_2colors<-function( dataset, to_jitter=F, prop_names=NULL, titles=list(main='X vs Y', color='colors', size='size'), min_size = 1, max_size = 5, max_sat = 200, fav_col='green', alpha_default=0.7, ...) {
	# NB: this code can handle a third primary color, but human eyes can't so for the moment I've forbidden that option.
	if(exists('dataset')) {
	if (is.null(prop_names)) {print(prop_names<-names(dataset))};
	if (length(prop_names) ==4) {propX<-dataset[,1]; propY<-dataset[,2]; propS<-dataset[,3]; propCA<-dataset[,4]}
	else if (length(prop_names) ==5) {propX<-dataset[,1]; propY<-dataset[,2]; propS<-dataset[,3]; propCA<-dataset[,4]; propCB<-dataset[,5]}
	else if (length(prop_names) ==6) {propX<-dataset[,1]; propY<-dataset[,2]; propS<-dataset[,3]; propCA<-dataset[,4]; propCB<-dataset[,5]; propCC<-dataset[,6]; print("Three colors are too many for measly human eyes")}
	else {stop(sprintf("I don't know what to do with an array with %d properties",length(prop_names)))}
	}else {stop('did you actually give me a dataset to work with?')}
	rangepropX<-range(propX); log_x=log_it(rangepropX); if (!exists('xlab')) {xlab=prop_names[1]}
	rangepropY<-range(propY); log_y=log_it(rangepropY); if (!exists('ylab')) {ylab=prop_names[2]}
	rangepropS<-range(propS); log_s=log_it(rangepropS);
	# the line below restricts the sizes in case the span of values isn't too spread out.
	Max2min<- rangepropS[2]/rangepropS[1]; if ((!log_s) & (Max2min<5)) {min_size=2; max_size=min_size*Max2min}
	log_xy=paste(if(log_x){'x'}else{''},if(log_y){'y'}else{''},sep="");
	rangepropCA<-range(propCA); log_cA=log_it(rangepropCA);
	if (exists('propCB')) {rangepropCB<-range(propCB); log_cB=log_it(rangepropCB);} else{rangepropCB<-c(0,0); log_cB=F}
	if (exists('propCC')) {rangepropCC<-range(propCC); log_cC=log_it(rangepropCC);} else{rangepropCC<-c(0,0); log_cC=F}
	pch <- if (!exists('pch')) {21} else {pch} # 21 for circles, 22 for squares, 23 for diamonds, 24 for triangles up, 25 for triangles down
	# the color palette is obtained by spreading the known palette (log)proportionally across the range of propC
	# how about setting the color directly in rgb space?
	if (!exists('palette_l')) {palette_l<-50}
	colorpalette<-0:palette_l*(max_sat/255)/palette_l
	print(colorpalette)
	if(fav_col=='red') {
	r_colpaltt<- colorpalette[1+invprop(propCA,rangepropCA[1],rangepropCA[2],log_cA)*(palette_l-1)]
	g_colpaltt<- if (exists('propCB')) colorpalette[1+invprop(propCB,rangepropCB[1],rangepropCB[2],log_cB)*(palette_l-1)] else 0
	b_colpaltt<- if (exists('propCC')) colorpalette[1+invprop(propCC,rangepropCC[1],rangepropCC[2],log_cC)*(palette_l-1)] else 0
	}else if(fav_col=='green') {
	g_colpaltt<- colorpalette[1+invprop(propCA,rangepropCA[1],rangepropCA[2],log_cA)*(palette_l-1)]
	r_colpaltt<- if (exists('propCB')) colorpalette[1+invprop(propCB,rangepropCB[1],rangepropCB[2],log_cB)*(palette_l-1)] else 0
	b_colpaltt<- if (exists('propCC')) colorpalette[1+invprop(propCC,rangepropCC[1],rangepropCC[2],log_cC)*(palette_l-1)] else 0
	}else if(fav_col=='blue') {
	b_colpaltt<- colorpalette[1+invprop(propCA,rangepropCA[1],rangepropCA[2],log_cA)*(palette_l-1)]
	g_colpaltt<- if (exists('propCB')) colorpalette[1+invprop(propCB,rangepropCB[1],rangepropCB[2],log_cB)*(palette_l-1)] else 0
	r_colpaltt<- if (exists('propCC')) colorpalette[1+invprop(propCC,rangepropCC[1],rangepropCC[2],log_cC)*(palette_l-1)] else 0
	}
	colpaltt<- rgb(r_colpaltt, g_colpaltt, b_colpaltt, alpha_default)
	# print(colpaltt)

	if (!length(colpaltt[!is.na(colpaltt)])) colpaltt[is.na(colpaltt)]<-'black'
	# the size palette is obtained by (log)proportional representation of the propC range in the min_size-max_size range.
	# minimum and maximum for printing symbols 0.5 to 1, or given by the user.
	sizepaltt<-(prop(invprop(propS,rangepropS[1],rangepropS[2],log_s),min_size, max_size, log_s))
	if (!length(sizepaltt[!is.na(sizepaltt)])) sizepaltt[is.na(sizepaltt)]<-1
	# print("defined palettes"); print(colpaltt); print(sizepaltt)
	if (to_jitter) {
	jittered_X <- jitter_logalong(propX);
	jittered_Y <- jitter_logalong(propY);
	}else{
	jittered_X <- (propX);
	jittered_Y <- (propY);
	}
	# print("jittered X and Y")
	# layout(matrix(c(1,1,1,2,1,1,1,3), 2, 4, byrow = TRUE)) # this works but let's try something fancier
	if(!exists('propCB')) {layout(matrix(c(2,2,1,3), 2, 2, byrow = TRUE), widths=c(3,1), heights=c(1,3))}
	else if(!exists('propCC')) {layout(matrix(c(1,2,1,3), 2, 2, byrow = TRUE), widths=c(3,1), heights=c(2,2))}
	else {layout(matrix(c(1,2,1,3,1,4,1,5), 4, 2, byrow = TRUE), widths=c(4,1), heights=c(1,1,1,1))}
	# print(sprintf("X from %G to %G, Y from %G to %G",rangepropX[1],rangepropX[2],rangepropY[1],rangepropY[2]))
	plot(NULL, xlim=rangepropX, ylim=rangepropY, log=log_xy, bty='n', xaxt='n', yaxt='n', main=titles$main, xlab=prop_names[1], ylab=prop_names[2]); # , xaxs="i" to plot exactly within xlim,
	# grid(nx = NULL, ny = NULL, col = "darkgray", lty = "dotted", lwd = 1, equilogs = T)
	my_grid(nx = NULL, ny = NULL, xlims=rangepropX, ylims=rangepropY, col = "darkgray", lty = "dotted", lwd = 1, equilogs = T)
	axXticks<-sort(unique(propX));
	if (length(axXticks) > 3) {
	if(log_it(axXticks)) {
	axXticks<-within(log_pretty(axXticks, 1),axXticks,F, T)
	}else{
	axXticks<-within(pretty(axXticks),axXticks,F, T)
	}
	}
	axis(1, at=axXticks, labels=sprintf("%5.3g",axXticks))

	axYticks<-sort(unique(propY));
	if (length(axYticks) > 3) {
	if(log_it(axYticks)) {
	axYticks<-within(log_pretty(axYticks, 1),axYticks,F, T)
	}else{
	axYticks<-within(pretty(axYticks),axYticks,F, T)
	}
	}
	axis(2, at=axYticks, labels=sprintf("%5.3g",axYticks))

	# Not yet working - attempt to reorder points so that the small ones are plotted above the little ones
	# for (n in 1:length(propS)) {indx<-c(indx,which(propS==sort(propS)[n]))}
	# indx<-rev(indx)
	# jittered_X<-jittered_X[indx]
	# jittered_Y<-jittered_Y[indx]
	# propS<-propS[indx]
	# propC<-propC[indx]

	points(jittered_X, jittered_Y, , pch=21, cex=sizepaltt, col="white", bg=colpaltt);
	if (length(propX) < 10) {text(x=jittered_X, y=jittered_Y, labels=dots_names, pos=4)}

	# now it's the time to print a nice colorspace as legend for the bubbles
	# the colorbar is a second plot, going at the top right (see layout(matrix()) command, much above)
	# I'll have two different colorspaces depending on whether two or three dimensions are being used as colors
	if(!exists('propCB')) {
	plot(NULL, xlim=range(propCA), ylim=c(0,1), log=if(log_cA) 'x' else '', xaxt='n', yaxt='n', xlab=prop_names[4], ylab=prop_names[5], bty='n', main=titles$color);
	for (n in 1:palette_l) {
	if (fav_col == 'red') rgbcol<-rgb(colorpalette[n], 0, 0, alpha_default)
	else if (fav_col == 'green') rgbcol<-rgb(0, colorpalette[n], 0, alpha_default)
	else if (fav_col == 'blue') rgbcol<-rgb(0, 0, colorpalette[n], alpha_default)
	rect(xleft= prop(x=(n-1)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)
	,xright= prop(x=(n)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)
	,ytop= 1
	,ybottom= 0
	, col=rgbcol
	, border="transparent"
	)
	}
	ycolorbarlegend<- 0.5
	colorticks<- if (log_cA) within(log_pretty(rangepropCA, 1),rangepropCA, F, F) else within(pretty(rangepropCA),rangepropCA, F)
	axis(1, at=colorticks, labels=sprintf("%5.3g",colorticks))
	}else if(!exists('propCC')) {
	plot(NULL, xlim=range(propCA), ylim=range(propCB), log=paste(if(log_cA) 'x' else '',if(log_cB) 'y' else '', sep=''), xaxt='n', yaxt='n', xlab=prop_names[4], ylab=prop_names[5], bty='n', main=titles$color);
	for (m in 1:palette_l) {
	for (n in 1:palette_l) {
	rgbcol<-rgb(colorpalette[n],colorpalette[m],0, alpha_default);
	rect(xleft= prop(x=(n-1)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)
	,xright= prop(x=(n)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)
	,ytop= prop(x=(m-1)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)
	,ybottom= prop(x=(m)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)
	,col=rgbcol
	,border="transparent"
	)
	}
	}
	colorAticks<- if (log_cA) within(log_pretty(rangepropCA, 1),rangepropCA, F, T) else within(pretty(rangepropCA),rangepropCA, F, T)
	axis(1, at=colorAticks, labels=sprintf("%5.3g",colorAticks))
	colorBticks<- if (log_cB) within(log_pretty(rangepropCB, 1),rangepropCB, F, T) else within(pretty(rangepropCB),rangepropCB, F, T)
	axis(2, at=colorBticks, labels=sprintf("%5.3g",colorBticks))
	}else{
	for (o in c(1,palette_l/2, palette_l)) {
	plot(NULL, xlim=range(propCA), ylim=range(propCB), log=paste(if(log_cA) 'x' else '',if(log_cB) 'y' else '', sep=''), xaxt='n', yaxt='n', xlab=prop_names[4], ylab=prop_names[5], bty='n', main=sprintf("ColC=%g percent",100*o/palette_l));
	for (m in 1:palette_l) {
	for (n in 1:palette_l) {
	rgbcol<-rgb(colorpalette[n],colorpalette[m],colorpalette[o], alpha_default)
	rect(xleft= prop(x=(n-1)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)
	,xright= prop(x=(n)/(palette_l),rangepropCA[1],rangepropCA[2],log_cA)
	,ytop= prop(x=(m-1)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)
	,ybottom= prop(x=(m)/(palette_l),rangepropCB[1],rangepropCB[2],log_cB)
	,col=rgbcol
	,border="transparent"
	)
	}
	}
	# flush.console(); Sys.sleep(0.5)
	}
	colorAticks<- if (log_cA) within(log_pretty(rangepropCA, 1),rangepropCA, F, T) else within(pretty(rangepropCA),rangepropCA, F, T)
	axis(1, at=colorAticks, labels=sprintf("%5.3g",colorAticks))
	colorBticks<- if (log_cB) within(log_pretty(rangepropCB, 1),rangepropCB, F, T) else within(pretty(rangepropCB),rangepropCB, F, T)
	axis(2, at=colorBticks, labels=sprintf("%5.3g",colorBticks))
	}

	# the bubble legend is a third plot, going in the lower right.
	plot(NULL, xlim=widen(c(-1,1), 0.9), ylim=widen(rangepropS, 0.9), log=if(log_s) 'y' else '', xaxt='n', yaxt='n', xlab='', ylab='', bty='n', main=titles$size);
	circles <- if (log_s) within(log_pretty(rangepropS, 1), rangepropS, strict=F, wantrng=F) else within(pretty(rangepropS), rangepropS, strict=F, wantrng=F) #all of them
	# but I only select five at the most:
	circles<-rev(circles[ceiling((1:5)*length(circles)/5)]) # select 5 circles for the legend.
	# I may want to get rid of one of the the last two, in case their values are too close
	# if (log_s) {if(circles[length(circles)]/circles[length(circles)-1] < 25) {circles<-circles[1:(length(circles)-1)]}}
	# else {if((circles[length(circles)]-circles[length(circles)-1]) < 0.7(circles[length(circles)-1]-circles[length(circles)-2])) {circles<-circles[1:(length(circles)-1)]}}
	# xlegend<-2
	points(rep(-1,length(circles)), circles, pch=21, bg='grey', col='white', cex=prop(invprop(circles, min(circles), max(circles)),min(sizepaltt),max(sizepaltt)))
	text(rep(0,length(circles)),circles, labels=sprintf("%5.3g",circles))
	}