## Loading required package: knitr


## The basic files and libraries needed for most presentations
# creates the libraries and common-functions sections


require(ggplot2) #for plots
require(lattice) # nicer scatter plots
require(plyr) # for processing data.frames
require(grid) # contains the arrow function

require(doMC) # for parallel code
require(png) # for reading png images
require(gridExtra)
require(reshape2) # for the melt function
#if (!require("biOps")) {
#  # for basic image processing
#  devtools::install_github("cran/biOps")
#  library("biOps")
#}
## To install EBImage
if (!require("EBImage")) { # for more image processing
source("http://bioconductor.org/biocLite.R")
biocLite("EBImage")
}

used.libraries<-c("ggplot2","lattice","plyr","reshape2","grid","gridExtra","biOps","png","EBImage")


# start parallel environment
registerDoMC()
# functions for converting images back and forth
im.to.df<-function(in.img,out.col="val") {
out.im<-expand.grid(x=1:nrow(in.img),y=1:ncol(in.img))
out.im[,out.col]<-as.vector(in.img)
out.im
}
df.to.im<-function(in.df,val.col="val",inv=F) {
in.vals<-in.df[[val.col]]
if(class(in.vals[1])=="logical") in.vals<-as.integer(in.vals*255)
if(inv) in.vals<-255-in.vals
out.mat<-matrix(in.vals,nrow=length(unique(in.df$x)),byrow=F) attr(out.mat,"type")<-"grey" out.mat } ddply.cutcols<-function(...,cols=1) { # run standard ddply command cur.table<-ddply(...) cutlabel.fixer<-function(oVal) { sapply(oVal,function(x) { cnv<-as.character(x) mean(as.numeric(strsplit(substr(cnv,2,nchar(cnv)-1),",")[[1]])) }) } cutname.fixer<-function(c.str) { s.str<-strsplit(c.str,"(",fixed=T)[[1]] t.str<-strsplit(paste(s.str[c(2:length(s.str))],collapse="("),",")[[1]] paste(t.str[c(1:length(t.str)-1)],collapse=",") } for(i in c(1:cols)) { cur.table[,i]<-cutlabel.fixer(cur.table[,i]) names(cur.table)[i]<-cutname.fixer(names(cur.table)[i]) } cur.table } show.pngs.as.grid<-function(file.list,title.fun,zoom=1) { preparePng<-function(x) rasterGrob(readPNG(x,native=T,info=T),width=unit(zoom,"npc"),interp=F) labelPng<-function(x,title="junk") (qplot(1:300, 1:300, geom="blank",xlab=NULL,ylab=NULL,asp=1)+ annotation_custom(preparePng(x))+ labs(title=title)+theme_bw(24)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank())) imgList<-llply(file.list,function(x) labelPng(x,title.fun(x)) ) do.call(grid.arrange,imgList) } ## Standard image processing tools which I use for visualizing the examples in the script commean.fun<-function(in.df) { ddply(in.df,.(val), function(c.cell) { weight.sum<-sum(c.cell$weight)
data.frame(xv=mean(c.cell$x), yv=mean(c.cell$y),
xm=with(c.cell,sum(x*weight)/weight.sum),
ym=with(c.cell,sum(y*weight)/weight.sum)
)
})
}

colMeans.df<-function(x,...) as.data.frame(t(colMeans(x,...)))

pca.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.cov<-cov(c.cell[,c("x","y")])
c.cell.eigen<-eigen(c.cell.cov)

c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,
data.frame(vx=c.cell.eigen$vectors[1,], vy=c.cell.eigen$vectors[2,],
vw=sqrt(c.cell.eigen$values), th.off=atan2(c.cell.eigen$vectors[2,],c.cell.eigen$vectors[1,])) ) }) } vec.to.ellipse<-function(pca.df) { ddply(pca.df,.(val),function(cur.pca) { # assume there are two vectors now create.ellipse.points(x.off=cur.pca[1,"x"],y.off=cur.pca[1,"y"], b=sqrt(5)*cur.pca[1,"vw"],a=sqrt(5)*cur.pca[2,"vw"], th.off=pi/2-atan2(cur.pca[1,"vy"],cur.pca[1,"vx"]), x.cent=cur.pca[1,"x"],y.cent=cur.pca[1,"y"]) }) } # test function for ellipse generation # ggplot(ldply(seq(-pi,pi,length.out=100),function(th) create.ellipse.points(a=1,b=2,th.off=th,th.val=th)),aes(x=x,y=y))+geom_path()+facet_wrap(~th.val)+coord_equal() create.ellipse.points<-function(x.off=0,y.off=0,a=1,b=NULL,th.off=0,th.max=2*pi,pts=36,...) { if (is.null(b)) b<-a th<-seq(0,th.max,length.out=pts) data.frame(x=a*cos(th.off)*cos(th)+b*sin(th.off)*sin(th)+x.off, y=-1*a*sin(th.off)*cos(th)+b*cos(th.off)*sin(th)+y.off, id=as.factor(paste(x.off,y.off,a,b,th.off,pts,sep=":")),...) } deform.ellipse.draw<-function(c.box) { create.ellipse.points(x.off=c.box$x[1],
y.off=c.box$y[1], a=c.box$a[1],
b=c.box$b[1], th.off=c.box$th[1],
col=c.box$col[1]) } bbox.fun<-function(in.df) { ddply(in.df,.(val), function(c.cell) { c.cell.mean<-colMeans.df(c.cell[,c("x","y")]) xmn<-emin(c.cell$x)
xmx<-emax(c.cell$x) ymn<-emin(c.cell$y)
ymx<-emax(c.cell$y) out.df<-cbind(c.cell.mean, data.frame(xi=c(xmn,xmn,xmx,xmx,xmn), yi=c(ymn,ymx,ymx,ymn,ymn), xw=xmx-xmn, yw=ymx-ymn )) }) } # since the edge of the pixel is 0.5 away from the middle of the pixel emin<-function(...) min(...)-0.5 emax<-function(...) max(...)+0.5 extents.fun<-function(in.df) { ddply(in.df,.(val), function(c.cell) { c.cell.mean<-colMeans.df(c.cell[,c("x","y")]) out.df<-cbind(c.cell.mean,data.frame(xmin=c(c.cell.mean$x,emin(c.cell$x)), xmax=c(c.cell.mean$x,emax(c.cell$x)), ymin=c(emin(c.cell$y),c.cell.mean$y), ymax=c(emax(c.cell$y),c.cell.mean$y))) }) } common.image.path<-"../common" qbi.file<-function(file.name) file.path(common.image.path,"figures",file.name) qbi.data<-function(file.name) file.path(common.image.path,"data",file.name) th_fillmap.fn<-function(max.val) scale_fill_gradientn(colours=rainbow(10),limits=c(0,max.val))  # Quantitative Big Imaging author: Kevin Mader date: 30 April 2015 width: 1440 height: 900 css: ../common/template.css transition: rotate ETHZ: 227-0966-00L # Dynamic Experiments # Course Outline source('../common/schedule.R')  • 19th February - Introduction and Workflows • 26th February - Image Enhancement (A. Kaestner) • 5th March - Basic Segmentation, Discrete Binary Structures • 12th March - Advanced Segmentation • 19th March - Applying Graphical Models and Machine Learning (A. Lucchi) • 26th March - Analyzing Single Objects • 2nd April - Analyzing Complex Objects • 16th April - Groups and Spatial Distribution • 23rd April - Statistics and Reproducibility • 30th April - Dynamic Experiments • 7th May - Scaling Up / Big Data • 21th May - Guest Lecture, Applications in High-content Screening and Wood • 28th May - Project Presentations # Literature / Useful References ### Books • Jean Claude, Morphometry with R • John C. Russ, “The Image Processing Handbook”,(Boca Raton, CRC Press) • Available online within domain ethz.ch (or proxy.ethz.ch / public VPN) ### Papers / Sites • Comparsion of Tracking Methods in Biology • Chenouard, N., Smal, I., de Chaumont, F., Maška, M., Sbalzarini, I. F., Gong, Y., … Meijering, E. (2014). Objective comparison of particle tracking methods. Nature Methods, 11(3), 281–289. doi:10.1038/nmeth.2808 • Maska, M., Ulman, V., Svoboda, D., Matula, P., Matula, P., Ederra, C., … Ortiz-de-Solorzano, C. (2014). A benchmark for comparison of cell tracking algorithms. Bioinformatics (Oxford, England), btu080–. doi:10.1093/bioinformatics/btu080 • Multiple Hypothesis Testing • Coraluppi, S. & Carthel, C. Multi-stage multiple-hypothesis tracking. J. Adv. Inf. Fusion 6, 57–67 (2011). • Chenouard, N., Bloch, I. & Olivo-Marin, J.-C. Multiple hypothesis tracking in microscopy images. in Proc. IEEE Int. Symp. Biomed. Imaging 1346–1349 (IEEE, 2009). # Previously on QBI ... • Image Enhancment • Highlighting the contrast of interest in images • Minimizing Noise • Understanding image histograms • Automatic Methods • Component Labeling • Single Shape Analysis • Complicated Shapes • Distribution Analysis # Quantitative "Big" Imaging The course has covered imaging enough and there have been a few quantitative metrics, but "big" has not really entered. What does big mean? • Not just / even large • it means being ready for big data • volume, velocity, variety (3 V's) • scalable, fast, easy to customize So what is "big" imaging • doing analyses in a disciplined manner • fixed steps • easy to regenerate results • no magic • having everything automated • 100 samples is as easy as 1 sample • being able to adapt and reuse analyses • one really well working script and modify parameters • different types of cells • different regions # Objectives 1. What sort of dynamic experiments do we have? 2. How can we design good dynamic experiments? 3. How can we track objects between points? • How can we track shape? • How can we track distribution? 4. How can we track topology? 5. How can we track voxels? 6. How can we assess deformation and strain? 7. How can assess more general cases? # Outline • Motivation (Why and How?) • Scientific Goals • Experiments • Simulations • Experiment Design • Object Tracking • Distribution • Topology • Voxel-based Methods • Cross Correlation • DIC • DIC + Physics • General Problems • Thickness - Lung Tissue • Curvature - Metal Systems • Two Point Correlation - Volcanic Rock # Motivation • 3D images are already difficult to interpret on their own • 3D movies (4D) are almost impossible • 2D movies (3D) can also be challenging We can say that it looks like, but many pieces of quantitative information are difficult to extract • how fast is it going? • how many particles are present? • are their sizes constant? • are some moving faster? • are they rearranging? # Scientific Goals ### Rheology Understanding the flow of liquids and mixtures is important for many processes • blood movement in arteries, veins, and capillaries • oil movement through porous rock • air through dough when cooking bread • magma and gas in a volcano ### Deformation Deformation is similarly important since it plays a significant role in the following scenarios • red blood cell lysis in artificial heart valves • microfractures growing into stress fractures in bone • toughening in certain wood types # Experiments The first step of any of these analyses is proper experimental design. Since there is always • a limited field of view • a voxel size • a maximum rate of measurements • a non-zero cost for each measurement There are always trade-offs to be made between getting the best possible high-resolution nanoscale dynamics and capturing the system level behavior. • If we measure too fast • sample damage • miss out on long term changes • have noisy data • Too slow • miss small, rapid changes • blurring and other motion artifacts • Too high resolution • not enough unique structures in field of view to track • Too low resolution • not sensitive to small changes # Simulation In many cases, experimental data is inherited and little can be done about the design, but when there is still the opportunity, simulations provide a powerful tool for tuning and balancing a large number parameters Simulations also provide the ability to pair post-processing to the experiments and determine the limits of tracking. # What do we start with? Going back to our original cell image 1. We have been able to get rid of the noise in the image and find all the cells (lecture 2-4) 2. We have analyzed the shape of the cells using the shape tensor (lecture 5) 3. We even separated cells joined together using Watershed (lecture 6) 4. We have created even more metrics characterizing the distribution (lecture 7) We have at least a few samples (or different regions), large number of metrics and an almost as large number of parameters to tune ### How do we do something meaningful with it? # Basic Simulation We start with a starting image # Fill Image code # ... is for extra columns in the data set fill.img.fn<-function(in.img,step.size=1,...) { xr<-range(in.img$x)
yr<-range(in.img$y) ddply(expand.grid(x=seq(xr[1],xr[2],step.size), y=seq(yr[1],yr[2],step.size)), .(x,y), function(c.pos) { ix<-c.pos$x[1]
iy<-c.pos$y[1] nset<-subset(in.img,x==ix & y==iy) if(nrow(nset)<1) nset<-data.frame(x=ix,y=iy,val=0,...) nset }) } make.spheres<-function(sph.list,base.gr=seq(-1,1,length.out=40)) { start.image<-expand.grid(x=base.gr,y=base.gr) start.image$val<-c(0)
for(i in 1:nrow(sph.list)) {
start.image$val<-with(start.image, val + ( ((x-sph.list[i,"x"])^2+(y-sph.list[i,"y"])^2)< sph.list[i,"r"]^2) ) } start.image$phase<-with(start.image,ifelse(val>0,TRUE,FALSE))
start.image
}
rand.list<-function(n.pts,r=0.15,min=-1,max=1) data.frame(x=runif(n.pts,min=-1,max=1),y=runif(n.pts,min=-1,max=1),r=r)
grid.list<-function(n.pts,r=0.15,min=-1,max=1) cbind(expand.grid(x=seq(min,max,length.out=n.pts),y=seq(min,max,length.out=n.pts)),r=r)


• A number of sphere objects with the same radius scattered evenly across the field of view
test.grid<-grid.list(5)
ggplot(subset(make.spheres(test.grid),phase),aes(x,y,fill=phase))+
geom_raster(fill="gray50",alpha=0.75)+
coord_equal()+
theme_bw(20)


### Analysis

• Threshold
• Component Label
• Shape Analysis
• Distribution Analysis

# Describing Motion

$\vec{v}(\vec{x})=\langle 0,0.1 \rangle$

ggplot(subset(make.spheres(test.grid),phase),aes(x,y))+
geom_raster(fill="gray50",alpha=0.75)+
geom_segment(data=cbind(test.grid,xv=0,yv=0.1),
aes(xend=x+xv,yend=y+yv),arrow=arrow(length = unit(0.3,"cm")))+
coord_equal()+
theme_bw(20)


$\vec{v}(\vec{x})=0.3\frac{\vec{x}}{||\vec{x}||}\times \langle 0,0,1 \rangle$

test.grid$xyr<-with(test.grid,sqrt(x^2+y^2)) ggplot(subset(make.spheres(test.grid),phase),aes(x,y))+ geom_raster(fill="gray50",alpha=0.75)+ geom_segment(data=with(test.grid,cbind(test.grid,xv=-0.3*y/xyr,yv=0.3*x/xyr)), aes(xend=x+xv,yend=y+yv),arrow=arrow(length = unit(0.3,"cm")))+ coord_equal()+ theme_bw(20)  # Many Frames $\vec{v}(\vec{x})=\langle 0,0.1 \rangle$ many.frames<-ldply(seq(0,2,length.out=9),function(in.offset) { cbind( make.spheres(data.frame(x=test.grid$x,y=test.grid$y+in.offset,r=test.grid$r)),
offset=in.offset
)
})

ggplot(subset(many.frames,phase),aes(x,y))+
geom_raster(fill="gray50",alpha=0.75)+
coord_equal()+
facet_wrap(~offset)+
labs(title="Different Frames in Linear Flow Image")+
theme_bw(20)


$\vec{v}(\vec{x})=0.3\frac{\vec{x}}{||\vec{x}||}\times \langle 0,0,1 \rangle$

many.frames<-cbind(make.spheres(test.grid),offset=0)
last.frame<-test.grid
for(in.offset in seq(0,2,length.out=9)) {
last.frame$xyr<-with(last.frame,sqrt(x^2+y^2)) last.frame$xyr[which(last.frame$xyr==0)]<-1 last.frame<-with(last.frame,data.frame(x=x-0.05*y/xyr,y=y+0.05*x/xyr,r=r)) many.frames<-rbind(many.frames, cbind(make.spheres(last.frame),offset=in.offset) ) } ggplot(subset(many.frames,phase),aes(x,y))+ geom_raster(fill="gray50",alpha=0.75)+ coord_equal()+ facet_wrap(~offset)+ labs(title="Different Frames in Spiral Flow")+ theme_bw(20)  # Random Appearance / Disappearance Under perfect imaging and experimental conditions objects should not appear and reappear but due to • noise • limited fields of view / depth of field • discrete segmentation approachs • motion artifacts • blurred objects often have lower intensity values than still objects It is common for objects to appear and vanish regularly in an experiment. many.frames<-ldply(seq(0,1.,length.out=9),function(in.offset) { c.grid<-test.grid[sample(nrow(test.grid), 18), ] cbind( make.spheres(data.frame(x=c.grid$x,y=c.grid$y+in.offset,r=c.grid$r)),
offset=in.offset
)
})
ggplot(subset(many.frames,phase),aes(x,y))+
geom_raster(fill="gray50",alpha=0.75)+
coord_equal()+
facet_wrap(~offset)+
labs(title="Different Frames in Linear Flow Image")+
theme_bw(20)


# Jitter / Motion Noise

Even perfect spherical objects do not move in a straight line. The jitter can be seen as a stochastic variable with a random magnitude ($$a$$) and angle ($$b$$). This is then sampled at every point in the field

$\vec{v}(\vec{x})=\vec{v}_L(\vec{x})+||a||\measuredangle b$

ggplot(subset(make.spheres(test.grid),phase),aes(x,y))+
geom_raster(fill="gray50",alpha=0.75)+
geom_segment(data=cbind(test.grid,
xv=0+runif(nrow(test.grid),min=-0.05,max=0.05),
yv=0.1+runif(nrow(test.grid),min=-0.05,max=0.05)),
aes(xend=x+xv,yend=y+yv),arrow=arrow(length = unit(0.3,"cm")))+
coord_equal()+
theme_bw(20)


# Jitter (Continued)

Over many frames this can change the path significantly

last.frame<-test.grid[,c("x","y","r")]
last.frame$id<-1:nrow(last.frame) many.frames<-cbind(make.spheres(last.frame),offset=0) many.grids<-cbind(last.frame,offset=0) for(in.offset in cumsum(rep(0.2,8))) { last.frame<-with(last.frame, data.frame(x=x+runif(nrow(last.frame),min=-0.1,max=0.1), y=y+0.2+runif(nrow(last.frame),min=-0.1,max=0.1), r=r) ) last.frame$id<-1:nrow(last.frame)

many.frames<-rbind(many.frames,
cbind(make.spheres(last.frame),offset=in.offset)
)

many.grids<-rbind(many.grids,
cbind(last.frame,offset=in.offset)
)
}
ggplot(subset(many.frames,phase),aes(x,y))+
geom_raster(fill="gray50",alpha=0.75)+
coord_equal()+
facet_wrap(~offset)+
labs(title="Different Frames in Linear Flow Image")+
theme_bw(20)


The simulation can be represented in a more clear fashion by using single lines to represent each spheroid

ggplot(many.grids,aes(x,y,))+
geom_path(aes(color=id,group=id))+
coord_equal()+
labs(title="Different Paths in Linear Jittered Flow Image")+
theme_bw(20)


# Limits of Tracking

We see that visually tracking samples can be difficult and there are a number of parameters which affect the ability for us to clearly see the tracking.

• flow rate
• flow type
• density
• appearance and disappearance rate
• jitter
• particle uniqueness

We thus try to quantify the limits of these parameters for different tracking methods in order to design experiments better.

### Acquisition-based Parameters

• acquisition rate $$\rightarrow$$ flow rate, jitter (per frame)
• resolution $$\rightarrow$$ density, appearance rate

### Experimental Parameters

• experimental setup (pressure, etc) $$\rightarrow$$ flow rate/type
• polydispersity $$\rightarrow$$ particle uniqueness
• vibration/temperature $$\rightarrow$$ jitter
• mixture $$\rightarrow$$ density

# Tracking: Nearest Neighbor

While there exist a number of different methods and complicated approaches for tracking, for experimental design it is best to start with the simplist, easiest understood method. The limits of this can be found and components added as needed until it is possible to realize the experiment

• If a dataset can only be analyzed with a multiple-hypothesis testing neural network model then it might not be so reliable

We then return to nearest neighbor which means we track a point ($$\vec{P}_0$$) from an image ($$I_0$$) at $$t_0$$ to a point ($$\vec{P}_1$$) in image ($$I_1$$) at $$t_1$$ by

$\vec{P}_1=\textrm{argmin}(||\vec{P}_0-\vec{y}|| \forall \vec{y}\in I_1)$

# Scoring Tracking

In the following examples we will use simple metrics for scoring fits where the objects are matched and the number of misses is counted.

There are a number of more sensitive scoring metrics which can be used, by finding the best submatch for a given particle since the number of matches and particles does not always correspond. See the papers at the beginning for more information

source('~/Dropbox/tipl_maven/snippets/R/trackingCode.R')
source('../common/data/simFlow.R')


# Basic Simulations

Input flow from simulation

$\vec{v}(\vec{x})=\langle 0,0,0.05 \rangle+||0.01||\measuredangle b$

# Generate a simple synthetic dataset
in.frames<-generate.frames(base.rand=0.01,crop.size=c(-10,10),n.objects=20,n.frames=10,flow.rate=0.05,force.2d=T)



ggplot(do.call(rbind,in.frames),
aes(x=POS_X,y=POS_Z,color=as.factor(REAL_LACUNA_NUMBER)))+
geom_path(aes(linetype="Original"))+
labs(x="X Position",y="Z Position",color="Object ID",title="Flow Simulation Results")+
theme_bw(20)


Nearest Neighbor Tracking

ggplot(do.call(rbind,in.frames),
aes(x=POS_X,y=POS_Z,color=as.factor(REAL_LACUNA_NUMBER)))+
#geom_path(aes(linetype="Original"))+
geom_path(data=subset(all.tracks,Matching=="No Offset"),aes(linetype="Tracked"),size=2,alpha=0.5)+
facet_wrap(~Matching)+
labs(x="X Position",y="Z Position",color="Object ID",title="Tracking Results")+
theme_bw(20)


# More Complicated Flows

Input flow from simulation

$\vec{v}(\vec{x})=\langle 0,0,0.01 \rangle+||0.05||\measuredangle b$

# Generate a simple synthetic dataset
in.frames<-generate.frames(base.rand=0.1,crop.size=c(-10,10),n.objects=20,n.frames=20,flow.rate=0.01,
force.2d=T,rand.fun=function(x,min=0,max=1) rnorm(x,(min+max)/2,(max-min)/2))

ggplot(do.call(rbind,in.frames),