Rev. | 5d26d667526f2011f88783fd522afa470d80303d |
---|---|
Größe | 11,359 Bytes |
Zeit | 2011-02-03 03:18:40 |
Autor | lorenzo |
Log Message | I fixed a bit the code to make it able to better process another dataset. |
rm(list=ls()) #clear the workspace
# Documentation
## This scripts takes an input file of the kind
# time ID node A ID node B i.e. a three-column input, where the ID of
# each node is preferably an integer number. The time index is also an
# integer number (n=0,1,2...)
#One can specify a parameter "step" telling how many physical time units
#(e.g. days, seconds, etc...) each entry in time stands for.
#The input format amounts to listing the active links in the network at
# every time.
#Isolated nodes are described as self-interacting nodes, e.g. isolated
# node 3000 at time 7 becomes
# 7 3000 3000
#Finally, links are not repeated i.e. for a given time step one can find
#the e.g. link
# 6 1300 1000 but not also
# 6 1000 1300
library(igraph)
library(ggplot2)
library(tikzDevice)
#The only crucial library is the igraph library for complex networks.
#The other two libraries are for plotting only.
#NB: on a Debian system with R installed from standard repositories, the
#libraries above can be installed from
# an R session with
# install.packages("library_name")
## Function log_binning_3 is used to bin discrete data on an
## evenly-space grid in logx.
#the discrete nature of the data introduces some nuisances in the
#binning
#procedure.
#The function needs the range of the data [x_min, x_max] and the number
#of bins n_bins.
#epsi is a tolerance parameter which can be set=0. The output is the
#grid defining the bins.
log_binning_3 <- function(x_min,x_max,epsi,n_bin){
x_max <- x_max+epsi
m <- n_bin-1
r <- (x_max/x_min)^(1/m)
my_seq <- seq(0,m,by=1)
grid <- x_min*r^my_seq
}
#Function which accepts a set of data ("data") as a vector and
#calculates the actual histogram in log space
#It needs to call the function log_binning_3.
discrete_binning <- function(data, step, x_min, x_max, epsi,n_bin, scale_min){
new_data <- data/step
x_min <- x_min/step
x_max <- x_max/step
grid <- log_binning_3(x_min,x_max,epsi,n_bin)
my_seq <- seq(x_min,x_max)
h2<-hist(my_seq, plot=F, breaks=grid)
#Measure the discrete width of each bin, i.e. how many
#points can fall in each bin.
norm_count <- h2$counts
h1<-hist(new_data, plot=F, breaks=grid)
sel <- which(h1$counts>0)
x_left <- grid[1:length(grid)-1][sel]*step
x_right <- grid[2:length(grid)][sel]*step
y_max <- h1$counts[sel]/sum(h1$count)/norm_count[sel]
scale_min <- min(scale_min, min(y_max/2))
y_min <- rep(scale_min,length(y_max))
data_out <- cbind(y_max,y_min,x_left,x_right)
data_out <- as.data.frame(data_out)
names(data_out) <- c("y_max","y_min","x_left","x_right")
return(data_out)
}
#This function converts the input file into a network aggregated in the interval
# [t_ini, t_fin). It returns a graph object (it needs the igraph library)
aggregate <- function(data, t_ini, t_end, step){
sel <- which((data[,1]>=t_ini) & (data[, 1]<t_end))
tab <- data[sel,2:3 ]
g <- graph.data.frame(tab, dir=FALSE)
E(g)$weight <- count.multiple(g)*step # I am now assigning a weight to
# each edge according to
# how often it gets repeated
g <- simplify(g)
return(g)
}
epsi <- 0 #tolerance parameter in the logarithmic binning
step <- 100 #we say that every time step is worth 100 in some units
#(days, minutes, secs, etc...)
scale_min <- 1e-4 #a graphical parameter fixing the bottom of the
#logarithmic binning when I cannot naturally introduce a zero
n_bin <- 9 #number of logaritmic bins
lin_bin <- 5 # width of the bins (linear bin structure)
t_ini <- 0 #time at which I start aggregating the results
t_end <- 50 #time when I stop the aggregation
my_seed <- 199 #parameter I can choose in order to have reproducible
#graph layouts
#read the time-dependent edgelist
tab <- read.table("CoSMoGraph.dat", header=FALSE)
# aggregate everything into a network
#this requires the igraph library.
g <- aggregate(tab, t_ini, t_end, step)
#the following bit is only for visualization (plotting the networks)
#chose a layout for the network.
l <- layout.fruchterman.reingold(g)
l <- layout.norm(l, -1,1, -1,1)
#calculate network diameter (it will be highlighted in the network
#visualization)
d <- get.diameter(g,weights=NA)
set.seed(my_seed)
pdf("aggregated-network.pdf")
#main plot
plot(g, layout=l,
vertex.label.dist=0.5,
vertex.color="#ff000033",
vertex.frame.color="#ff000033",
edge.color="#55555533",
vertex.label=NA,
vertex.size=4,
edge.width=log(1+E(g)$weight/step),
edge.label=NA )
#now highlight the network diameter
V(g)$id <- seq(vcount(g))-1
g2 <- subgraph(g, d)
plot(g2, layout=l[ V(g2)$id+1, ],
vertex.color="red",
vertex.frame.color="#ff000033",
edge.color="#555555",
vertex.label=NA ,
vertex.size=4,
add=TRUE,
rescale=FALSE,
edge.width=log(1+E(g2)$weight/step))
#Switch off the plotting device.
dev.off()
set.seed(my_seed)
pdf("aggregated-network-no-diam.pdf")
#main plot
plot(g, layout=l,
vertex.label.dist=0.5,
vertex.color="#ff000033",
vertex.frame.color="#ff000033",
edge.color="#55555533",
vertex.label=NA,
vertex.size=4,
edge.width=log(1+E(g)$weight/step),
edge.label=NA )
#Switch off the plotting device.
dev.off()
#At this point we save the aggregated network using two standard
#formats,namely the gml and graphml formats.
fn <- paste("aggregated-network.gml",sep="")
write.graph(g, fn, format="gml" )
fn <- paste("aggregated-network.xml",sep="")
write.graph(g, fn, format="graphml" )
#Now we calculate the list of edge weights and the list of node
#strengths which are stored into two
#files. The list of nodes degrees is also calculated.
wlist <- E(g)$weight
strength <- graph.strength(g)
k_dist <- degree(g)
k_dist <- as.data.frame(k_dist)
names(k_dist) <- c("k")
#Save the raw quantities into files
write.table(wlist, "weight_list.dat", col.names=FALSE, row.names=FALSE)
write.table(strength, "strength_list.dat", col.names=FALSE, row.names=FALSE)
write.table(k_dist, "degree_list.dat", col.names=FALSE, row.names=FALSE)
#Now bin the two previous sets of data.
hist_data_s <- discrete_binning(strength, step, min(strength),
max(strength),
epsi,n_bin, scale_min)
hist_data_w <- discrete_binning(wlist, step, min(wlist), max(wlist),
epsi,n_bin, scale_min)
#Now I save the binning of the weight and strength distributions to files.
write.table(hist_data_s, "strength_histogram.dat", row.names=FALSE)
write.table(hist_data_w, "weight_histogram.dat", row.names=FALSE)
#In this specific example, the degree distribution is not fat-tailed,
#hence there is no need to resort to a logarithmic binning.
#what follows is the setup of graphical parameters to generate plots of
#the calculated distributions
#it relies on a combination of ggplot2+tikzDevice. The plot object is
#translated into a tex file and then compiled on the fly.
#It allows the correct representation of mathematical formulae,
#superscripts/subscripts exactly like in a tex document.
#Set axis ticks for the log-log plot
mb <- c(seq(10,100, by=10), seq(200,1000, by=100), seq(2000, 1e4, by=1e3),
seq(2e4, 1e5, by=1e4), seq(2e5, 1e6, by=1e5), seq(2e6, 1e7, by=1e6) )
my_sel <- seq(1,length(mb), by=9)
my_label2 <- rep("",length(mb))
my_label2[my_sel] <- expression(10^1 ,10^2, 10^3, 10^4, 10^5, 10^6, 10^7)
mby <- c(seq(1e-8,1e-7,by=1e-8),seq(2e-7,1e-6,by=1e-7),seq(2e-6, 1e-5,by=1e-6),
seq(2e-5,1e-4, by=1e-5),seq(2e-4,1e-3, by=1e-4),
seq(2e-3, 1e-2, by=1e-3),seq(2e-2, 1e-1, by=1e-2),
seq(2e-1,1,by=1e-1) )
my_sel <- seq(1,length(mby), by=9)
my_label2y <- rep("",length(mby))
my_label2y[my_sel] <- expression(10^-8,10^-7,10^-6 ,10^-5, 10^-4, 10^-3,
10^-2, 10^-1,10^0)
#Now use ggplot2 to generate the plot object
gpl2<- ggplot(data=hist_data_w)+
# geom_point(aes(x=flux,y=n_clus))+
geom_rect( aes(ymin = y_min, ymax= y_max, xmin=x_left,xmax=x_right),
colour="black", fill="blue")+
opts( panel.background=theme_rect(fill='white', size=1.5))+
xlab("$w_{ij}$ [arbitrary units]")+
ylab("$P(w_{ij})$")+
scale_x_continuous(trans="log10",limits=c(1e2,1.1e4), breaks=mb,
label=my_label2)+
scale_y_continuous(trans="log10",limits=c(1e-4,1e0), breaks=mby,
label=my_label2y)+
opts(panel.grid.minor = theme_blank())+
opts(panel.grid.major = theme_blank())+
opts(axis.ticks = theme_segment(colour = "black", size=1),
axis.ticks.length = unit(0.15, "cm"))+
opts(axis.title.x = theme_text(size = 20))+
opts(axis.title.y = theme_text(size = 20, angle=90))+
opts(axis.text.x = theme_text(size=15, colour="black",vjust=1))+
opts(axis.text.y = theme_text(size=15, colour="black", hjust=1))
fn <- paste("p_w_.tex")
#use tikzDevice to generate and the compile the latex file ultimately
#leading to the figures.
tikz(fn, standAlone = TRUE, width=5,height=5)
print(gpl2)
dev.off()
tools::texi2dvi(fn,pdf=T)
#Same story as above
gpl2<- ggplot(data=hist_data_s)+
geom_rect( aes(ymin = y_min, ymax= y_max, xmin=x_left,xmax=x_right),
colour="black", fill="blue")+
opts( panel.background=theme_rect(fill='white', size=1.5))+
xlab("$s$ [arbitrary units]")+
ylab("$P(s)$")+
scale_x_continuous(trans="log10",limits=c(100,1.1e5), breaks=mb,
label=my_label2)+
scale_y_continuous(trans="log10",limits=c(6e-5,1e-1), breaks=mby,
label=my_label2y)+
opts(panel.grid.minor = theme_blank())+
opts(panel.grid.major = theme_blank())+
opts(axis.ticks = theme_segment(colour = "black", size=1),
axis.ticks.length = unit(0.15, "cm"))+
opts(axis.title.x = theme_text(size = 20))+
opts(axis.title.y = theme_text(size = 20, angle=90))+
opts(axis.text.x = theme_text(size=15, colour="black",vjust=1))+
opts(axis.text.y = theme_text(size=15, colour="black", hjust=1))
fn <- paste("p_s_.tex")
tikz(fn, standAlone = TRUE, width=5,height=5)
print(gpl2)
dev.off()
tools::texi2dvi(fn,pdf=T)
#Now I use the geom_histogram of ggplot2 for a "simpler" histogram on a
#linear scale.
gpl2 <- ggplot(k_dist,aes(x=k))+
geom_histogram(aes(y=..count../sum(..count..)),binwidth=lin_bin,
colour="black", fill="blue",origin=1)+
opts( panel.background=theme_rect(fill='white', size=1.5))+
opts(panel.grid.minor = theme_blank())+
opts(panel.grid.major = theme_blank())+
scale_x_continuous(limits=c(0,102), breaks=seq(0, 100, by=10))+
scale_y_continuous(limits=c(0,.4), breaks=seq(0,0.4, by=0.1))+
opts(axis.ticks = theme_segment(colour = "black", size=1),
axis.ticks.length = unit(0.15, "cm"))+
xlab("$k$")+
ylab("$P(k)$")+
opts(axis.title.x = theme_text(size = 20))+
opts(axis.title.y = theme_text(size = 20, angle=90,hjust=0.5))+
opts(plot.title = theme_text(size = 25))+
opts(axis.text.x = theme_text(size=18, colour="black",vjust=1))+
opts(axis.text.y = theme_text(size=18, colour="black", hjust=1))
fn <- paste("p_k_.tex")
tikz(fn, standAlone = TRUE, width=5,height=5)
print(gpl2)
dev.off()
tools::texi2dvi(fn,pdf=T)
print("So far so good")