OSDN > Entwickler > larry77 > Kammer > myprojects-hg-reborn > File Details
myprojects-hg-reborn
Fork

(Original-Repository, kein Fork Herkunft)
File Info

Rev.	5d26d667526f2011f88783fd522afa470d80303d
Größe	11,359 Bytes
Zeit	2011-02-03 03:18:40
Autor	lorenzo
Log Message	I fixed a bit the code to make it able to better process another dataset.
Content

Im Rohformat exportieren
rm(list=ls()) #clear the workspace



# Documentation
## This scripts takes an input file of the kind

# time  ID node A  ID node B i.e. a three-column input, where the ID of
# each node is preferably an integer number. The time index is also an
# integer number (n=0,1,2...)
#One can specify a parameter "step" telling how many physical time units
#(e.g. days, seconds, etc...) each entry in time stands for.
#The input format amounts to listing the active links in the network at
# every time.
#Isolated nodes are described as self-interacting nodes, e.g. isolated
# node 3000 at time 7 becomes

# 7 3000 3000

#Finally, links are not repeated i.e. for a given time step one can find
#the e.g. link
# 6 1300 1000 but not also
# 6 1000 1300


library(igraph)
library(ggplot2)
library(tikzDevice)


#The only crucial library is the igraph library for complex networks.
#The other two libraries are for plotting only.
#NB: on a Debian system with R installed from standard repositories, the
#libraries above can be installed from
# an R session with
# install.packages("library_name")


## Function log_binning_3 is used to bin discrete data on an
## evenly-space grid in logx.
#the discrete nature of the data introduces some nuisances in the
#binning
#procedure.
#The function needs the range of the data [x_min, x_max] and the number
#of bins n_bins.
#epsi is a tolerance parameter which can be set=0. The output is the
#grid defining the bins.

log_binning_3 <- function(x_min,x_max,epsi,n_bin){
  x_max <- x_max+epsi
  m <- n_bin-1
  r <- (x_max/x_min)^(1/m)
  my_seq <- seq(0,m,by=1)
  grid <- x_min*r^my_seq

}


#Function which accepts a set of data ("data") as a vector and
#calculates the actual histogram in log space
#It needs to call the function log_binning_3.

discrete_binning <- function(data, step, x_min, x_max, epsi,n_bin, scale_min){

  new_data <- data/step
  x_min <- x_min/step
  x_max <- x_max/step

  grid <- log_binning_3(x_min,x_max,epsi,n_bin)

  my_seq <- seq(x_min,x_max)


  h2<-hist(my_seq, plot=F, breaks=grid)

#Measure the discrete width of each bin, i.e. how many
#points can fall in each bin.

  norm_count <- h2$counts

  h1<-hist(new_data, plot=F, breaks=grid)

  sel <- which(h1$counts>0)

  x_left <- grid[1:length(grid)-1][sel]*step

  x_right <- grid[2:length(grid)][sel]*step

  y_max <- h1$counts[sel]/sum(h1$count)/norm_count[sel]

  scale_min <- min(scale_min, min(y_max/2))

  y_min <- rep(scale_min,length(y_max))

  data_out <- cbind(y_max,y_min,x_left,x_right)

  data_out <- as.data.frame(data_out)
  names(data_out) <- c("y_max","y_min","x_left","x_right")

  return(data_out)
  
}

#This function converts the input file into a network aggregated in the interval
# [t_ini, t_fin). It returns a graph object (it needs the igraph library)

aggregate <- function(data, t_ini, t_end, step){

  sel <- which((data[,1]>=t_ini) & (data[, 1]<t_end))

  tab <- data[sel,2:3 ]

  g <- graph.data.frame(tab, dir=FALSE)

  E(g)$weight <- count.multiple(g)*step # I am now assigning a weight to
                                      # each edge according to 
                                      # how often it gets repeated




  g <- simplify(g)

  return(g)
  
}

epsi <- 0 #tolerance parameter in the logarithmic binning
step <- 100 #we say that every time step is worth 100 in some units
            #(days, minutes, secs, etc...)
scale_min <- 1e-4 #a graphical parameter fixing the bottom of the
                  #logarithmic binning when I cannot naturally introduce a zero
n_bin <- 9 #number of logaritmic bins
lin_bin <- 5 # width of the bins (linear bin structure) 
t_ini <- 0 #time at which I start aggregating the results
t_end <- 50 #time when I stop the aggregation 
my_seed <- 199 #parameter I can choose in order to have reproducible
               #graph layouts


#read the time-dependent edgelist
tab <- read.table("CoSMoGraph.dat", header=FALSE)

# aggregate everything into a network
#this requires the igraph library.

g <- aggregate(tab, t_ini, t_end, step)


#the following bit is only for visualization (plotting the networks)

#chose a layout for the network.
l <- layout.fruchterman.reingold(g)
l <- layout.norm(l, -1,1, -1,1) 


#calculate network diameter (it will be highlighted in the network
                            #visualization)
d <- get.diameter(g,weights=NA)

set.seed(my_seed)

pdf("aggregated-network.pdf")
#main plot
plot(g, layout=l,
     vertex.label.dist=0.5,
     vertex.color="#ff000033",
     vertex.frame.color="#ff000033",
     edge.color="#55555533",
     vertex.label=NA,
     vertex.size=4,
     edge.width=log(1+E(g)$weight/step),
     edge.label=NA    )

#now highlight the network diameter
V(g)$id <- seq(vcount(g))-1
g2 <- subgraph(g, d)
plot(g2, layout=l[ V(g2)$id+1, ],
     vertex.color="red",
     vertex.frame.color="#ff000033",
     edge.color="#555555",
     vertex.label=NA ,
     vertex.size=4,
     add=TRUE,
     rescale=FALSE,
     edge.width=log(1+E(g2)$weight/step))

#Switch off the plotting device.
dev.off()

set.seed(my_seed)

pdf("aggregated-network-no-diam.pdf")
#main plot
plot(g, layout=l,
     vertex.label.dist=0.5,
     vertex.color="#ff000033",
     vertex.frame.color="#ff000033",
     edge.color="#55555533",
     vertex.label=NA,
     vertex.size=4,
     edge.width=log(1+E(g)$weight/step),
     edge.label=NA    )

#Switch off the plotting device.
dev.off()


#At this point we save the aggregated network using two standard
#formats,namely the gml and graphml formats.


fn <- paste("aggregated-network.gml",sep="")

write.graph(g, fn, format="gml" )


fn <- paste("aggregated-network.xml",sep="")

write.graph(g, fn, format="graphml" )


#Now we calculate the list of edge weights and the list of node
#strengths which are stored into two
#files. The list of nodes degrees is also calculated. 

wlist <- E(g)$weight
strength <- graph.strength(g)

k_dist <- degree(g)
k_dist <- as.data.frame(k_dist)
names(k_dist) <- c("k")



#Save the raw quantities into files
write.table(wlist, "weight_list.dat", col.names=FALSE, row.names=FALSE)
write.table(strength, "strength_list.dat", col.names=FALSE, row.names=FALSE)
write.table(k_dist, "degree_list.dat", col.names=FALSE, row.names=FALSE)


#Now bin the two previous sets of data.
hist_data_s <- discrete_binning(strength, step, min(strength),
                                max(strength),
                                epsi,n_bin, scale_min)


hist_data_w <- discrete_binning(wlist, step, min(wlist), max(wlist),
                                epsi,n_bin, scale_min)



#Now I save the binning of the weight and strength distributions to files.

write.table(hist_data_s, "strength_histogram.dat", row.names=FALSE)


write.table(hist_data_w, "weight_histogram.dat", row.names=FALSE)


#In this specific example, the degree distribution is not fat-tailed,
#hence there is no need to resort to a logarithmic binning.


#what follows is the setup of graphical parameters to generate plots of
#the calculated distributions
#it relies on a combination of ggplot2+tikzDevice. The plot object is
#translated into a tex file and then compiled on the fly.
#It allows the correct representation of mathematical formulae,
#superscripts/subscripts exactly like in a tex document.


#Set axis ticks for the log-log plot

mb <- c(seq(10,100, by=10), seq(200,1000, by=100), seq(2000, 1e4, by=1e3),
        seq(2e4, 1e5, by=1e4), seq(2e5, 1e6, by=1e5), seq(2e6, 1e7, by=1e6)    )

my_sel <- seq(1,length(mb), by=9)

my_label2 <- rep("",length(mb))
my_label2[my_sel] <- expression(10^1 ,10^2, 10^3, 10^4, 10^5, 10^6, 10^7)




mby <- c(seq(1e-8,1e-7,by=1e-8),seq(2e-7,1e-6,by=1e-7),seq(2e-6, 1e-5,by=1e-6),
         seq(2e-5,1e-4, by=1e-5),seq(2e-4,1e-3, by=1e-4),
         seq(2e-3, 1e-2, by=1e-3),seq(2e-2, 1e-1, by=1e-2),
         seq(2e-1,1,by=1e-1)    )


my_sel <- seq(1,length(mby), by=9)

my_label2y <- rep("",length(mby))
my_label2y[my_sel] <- expression(10^-8,10^-7,10^-6 ,10^-5, 10^-4, 10^-3,
    10^-2, 10^-1,10^0)



#Now use ggplot2 to generate the plot object
gpl2<- ggplot(data=hist_data_w)+
 #  geom_point(aes(x=flux,y=n_clus))+
  geom_rect( aes(ymin = y_min, ymax= y_max, xmin=x_left,xmax=x_right),
            colour="black", fill="blue")+
  opts( panel.background=theme_rect(fill='white', size=1.5))+
  xlab("$w_{ij}$ [arbitrary units]")+
  ylab("$P(w_{ij})$")+
  scale_x_continuous(trans="log10",limits=c(1e2,1.1e4), breaks=mb,
                    label=my_label2)+
  scale_y_continuous(trans="log10",limits=c(1e-4,1e0), breaks=mby,
                   label=my_label2y)+
    opts(panel.grid.minor = theme_blank())+
  opts(panel.grid.major = theme_blank())+
  opts(axis.ticks = theme_segment(colour = "black", size=1),
       axis.ticks.length = unit(0.15, "cm"))+
  opts(axis.title.x = theme_text(size = 20))+
  opts(axis.title.y = theme_text(size = 20, angle=90))+
  opts(axis.text.x = theme_text(size=15, colour="black",vjust=1))+
  opts(axis.text.y = theme_text(size=15, colour="black", hjust=1))


fn <- paste("p_w_.tex")

#use tikzDevice to generate and the compile the latex file ultimately
#leading to the figures.

tikz(fn, standAlone = TRUE, width=5,height=5)
print(gpl2)
dev.off() 

tools::texi2dvi(fn,pdf=T)




#Same story as above

gpl2<- ggplot(data=hist_data_s)+
  geom_rect( aes(ymin = y_min, ymax= y_max, xmin=x_left,xmax=x_right),
            colour="black", fill="blue")+
  opts( panel.background=theme_rect(fill='white', size=1.5))+
  xlab("$s$ [arbitrary units]")+
  ylab("$P(s)$")+
  scale_x_continuous(trans="log10",limits=c(100,1.1e5), breaks=mb,
                    label=my_label2)+
  scale_y_continuous(trans="log10",limits=c(6e-5,1e-1), breaks=mby,
                   label=my_label2y)+
    opts(panel.grid.minor = theme_blank())+
  opts(panel.grid.major = theme_blank())+
   opts(axis.ticks = theme_segment(colour = "black", size=1),
        axis.ticks.length = unit(0.15, "cm"))+
  opts(axis.title.x = theme_text(size = 20))+
  opts(axis.title.y = theme_text(size = 20, angle=90))+
  opts(axis.text.x = theme_text(size=15, colour="black",vjust=1))+
  opts(axis.text.y = theme_text(size=15, colour="black", hjust=1))


fn <- paste("p_s_.tex")

tikz(fn, standAlone = TRUE, width=5,height=5)
print(gpl2)
dev.off() 

tools::texi2dvi(fn,pdf=T)



#Now I use the geom_histogram of ggplot2 for a "simpler" histogram on a
#linear scale.

gpl2 <- ggplot(k_dist,aes(x=k))+
geom_histogram(aes(y=..count../sum(..count..)),binwidth=lin_bin,
               colour="black", fill="blue",origin=1)+
  opts( panel.background=theme_rect(fill='white', size=1.5))+
  opts(panel.grid.minor = theme_blank())+
  opts(panel.grid.major = theme_blank())+
  scale_x_continuous(limits=c(0,102), breaks=seq(0, 100, by=10))+
  scale_y_continuous(limits=c(0,.4), breaks=seq(0,0.4, by=0.1))+
  opts(axis.ticks = theme_segment(colour = "black", size=1),
       axis.ticks.length = unit(0.15, "cm"))+
  xlab("$k$")+
  ylab("$P(k)$")+
  opts(axis.title.x = theme_text(size = 20))+
  opts(axis.title.y = theme_text(size = 20, angle=90,hjust=0.5))+
  opts(plot.title = theme_text(size = 25))+
  opts(axis.text.x = theme_text(size=18, colour="black",vjust=1))+
  opts(axis.text.y = theme_text(size=18, colour="black", hjust=1))


fn <- paste("p_k_.tex")


tikz(fn, standAlone = TRUE, width=5,height=5)
print(gpl2)
dev.off() 

tools::texi2dvi(fn,pdf=T)





print("So far so good")
myprojects-hg-reborn Fork

Tags

Frequently used words (click to add to your profile)

File Info

Content

myprojects-hg-reborn
Fork