Rev. | 221dfa4b3d3e7d79d49af50aa18e80ca15a6d8d8 |
---|---|
Größe | 10,424 Bytes |
Zeit | 2009-08-31 21:21:14 |
Autor | lorenzo |
Log Message | Minor modifications to a couple of codes. I simply changed the names of the input files they require to work. |
#! /usr/bin/env python
from sociopatterns.loader import Loader
import scipy as s
import numpy as n
import pylab as p
#Some notes on usage: since I did not build the library provided by Ciro,
#I need to copy the whole sociopattern directory in the same folder where
#I have this code.
XXTEA_CRYPTO_KEY = (0xd2e3fd73, 0xfa70ca9c, 0xe2575826, 0x0fe09946)
# loader = Loader(["log_1240843289.pcap"], decode=1,
# xxtea_crypto_key=XXTEA_CRYPTO_KEY, load_contacts=0, unique_contacts=1,
# load_sightings=1)
# def iter_reading(read_contact=1,read_sighting=1,dumpfile,\
# time_begin,time_end, len_data ):
##############################################################
##############################################################
##############################################################
# The aim of this code is to gain info about the duration of the average
# visit in the museum. There are many subtleties, but we will start
# out with a very naive approach: the change in the bootcount number
#means that a visit has ended. The bootcount number is broadcast in the
#sighting protocol. This info needs to be complemented by the ID of the tag
#changing its bootcount and by the times when this happens.
#Now the aim is to extract (timestap, tag_id,bootcount) and store them into
#an array consisting of 3 columns. Rather than attaching the data every time we
#read them, we will allocate the array to start with (otherwise the whole
#process would become slower and slower as time progresses)
#initialize an array I will use to load the data
read=0
data_arr=s.zeros(2).astype("int") #just an initialization, it will be
#overwritten anyhow
if (read==1):
time_begin=1240843289
#delta_time=20000
#time_end=time_begin+delta_time
time_end= 1241181886 #time_begin+3000
#n_row=delta_time
len_data=3613311+10
data_arr=s.zeros((len_data,3)).astype("int")-1 #let us initialize the
#array with an impossible value
print "s.shape(data_arr) is, ", s.shape(data_arr)
loader = Loader(["log_1240843289.pcap"],start_time=time_begin,
stop_time=time_end, decode=1,
xxtea_crypto_key=XXTEA_CRYPTO_KEY, load_contacts=0, unique_contacts=1,
load_sightings=1,unique_sightings=1, sighting_time_delta=10)
i=0
for c in loader:
#print c.t, c.id, c.boot_count
#print "c.protocol is, ", c.protocol
data_arr[i,:]=s.array([c.t,c.id,c.boot_count]).astype("int")
#print "data_arr[i, :] is, ", data_arr[i, :]
if (s.remainder(i,100000)==0):
print "i is, ", i
i+=1
sel=s.where(data_arr[:,0]!=-1)
data_arr=data_arr[sel[0],:]
p.save("data_arr.dat", data_arr , fmt='%d')
else:
data_arr=p.load("boot_count_cut.dat")
data_arr=data_arr.astype("int")
print "I finished reading the data"
print "id_list is, ", s.unique1d(data_arr[:,1])
print "s.shape(data_arr) is, ", s.shape(data_arr)
def time_interval_and_bootcount_single_tag(data_arr,tag_id):
find_id_by_tag=s.where(data_arr[:,1]==tag_id)
#the following may look odd but it retrieves the 1st and
#3rd column (counted from 1, or the oth and 2nd counted from zero)
#of the matrix, that is to day the timestamp and bootcount of the
#sighting reports of a given tag
retrive_time_bootcount=data_arr[find_id_by_tag[0],:][:,0:3:2]
#now calculate an array giving the gaps in the bootcounts and the
#corresponding gaps in the timestamps
p.save("time_and_bootcount.dat",retrive_time_bootcount , fmt='%d')
boot_diff=s.diff(retrive_time_bootcount[:,1])
time_diff=s.diff(retrive_time_bootcount[:,0])
boot_gap_sel=s.where(boot_diff!=0)
time_gaps=time_diff[boot_gap_sel]
return time_gaps
def time_interval_and_bootcount_all_tags(data_arr):
tag_id_list=s.unique1d(data_arr[:,1])
res=s.zeros(0).astype("int")
for track_tag in tag_id_list:
single_tag_gap=time_interval_and_bootcount_single_tag(data_arr\
,track_tag)
res=s.hstack((res,single_tag_gap))
return (res)
def visit_duration_single_tag(data_arr,tag_id):
find_id_by_tag=s.where(data_arr[:,1]==tag_id)
#the following may look odd but it retrieves the 1st and
#3rd column (counted from 1, or the oth and 2nd counted from zero)
#of the matrix, that is to day the timestamp and bootcount of the
#sighting reports of a given tag
retrive_time_bootcount=data_arr[find_id_by_tag[0],:][:,0:3:2]
#now calculate an array giving the gaps in the bootcounts and the
#corresponding gaps in the timestamps
bootcount_list=retrive_time_bootcount[:,1]
bootcount_unique=s.unique1d(bootcount_list)
time_list=retrive_time_bootcount[:,0]
duration_arr=s.zeros(len(bootcount_unique)).astype("int") -1 #it
#will be overwritten
interval_arr=s.zeros(len(bootcount_unique)-1).astype("int") -1 #it
#will be overwritten
#i=0
#for my_bootcount in bootcount_unique:
#sel=s.where(bootcount_list==my_bootcount)
#duration_arr[i]=max(time_list[sel])-min(time_list[sel])
#i+=1
for i in xrange(len(bootcount_unique)):
my_bootcount=bootcount_unique[i]
sel=s.where(bootcount_list==my_bootcount)
duration_arr[i]=max(time_list[sel])-min(time_list[sel])
if (i>0):
lower=bootcount_unique[i-1]
upper=bootcount_unique[i]
sel_lower=s.where(bootcount_list==lower)[0][-1]
sel_upper=s.where(bootcount_list==upper)[0][0]
interval_arr[i-1]=time_list[sel_upper]-time_list[sel_lower]
if (tag_id==503):
p.save("503_duration_arr.dat",duration_arr,fmt='%d' )
p.save("503_bootcount_list.dat",bootcount_list,fmt='%d' )
p.save("503_bootcount_unique.dat",bootcount_unique,fmt='%d' )
#NB: the number of reboots for a tag is given by the number of unique
#bootcounts minus 1 !!!
return [duration_arr, len(bootcount_unique)-1, interval_arr]
def visit_duration_many_tags(data_arr):
tag_id_list=s.unique1d(data_arr[:,1])
p.save("all_visit_tag_ids.dat", tag_id_list,fmt='%d' )
res=s.zeros(0).astype("int")
count_boot=s.zeros(0).astype("int")
interval_between_boots=s.zeros(0).astype("int")
for track_tag in tag_id_list:
duration_and_count=visit_duration_single_tag(data_arr,track_tag)
single_tag_duration=duration_and_count[0]
single_tag_count_boot=duration_and_count[1]
single_tag_boot_interval=duration_and_count[2]
res=s.hstack((res,single_tag_duration))
count_boot=s.hstack((count_boot,single_tag_count_boot))
interval_between_boots=s.hstack((interval_between_boots,\
single_tag_boot_interval))
return [res, count_boot,interval_between_boots]
def contact_duration_and_interval_single_tag(single_tag_no_rep, delta_slice):
#the following if condition is useful only when I am really tracking a particular
#tag whose ID is given a priory but which may not exist at all (in the sense that
#it would not estabilish any contact) in the time window during which I am studying
#the system.
if (single_tag_no_rep==None):
print "The chosen tag does not exist hence no analysis can be performed on it"
return
delta_slice=int(delta_slice) #I do not need floating point arithmetic
single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above
#print "gaps is, ", gaps
#gaps is now an array of integers. It either has a list of consecutive 1`s
# (which means a contact duration of delta_slice times the number of consecutive ones)
# of an entry higher than one which expresses (in units of delta_slice) the time during
#which the tag underwent no contact
#p.save("gaps.dat",gaps, fmt='%d')
find_gap=s.where(gaps != 1)
gap_distr=(gaps[find_gap]-1)*delta_slice #so, this is really the list of the
#time interval between two contacts for my tag. After the discussion with Ciro,
#I modified slightly the definition (now there is a -1) in the definition.
#It probably does not matter much for the calculated distribution.
#print "gap_distr is, ", gap_distr
#NB: the procedure above does NOT break down is gap_distr is empty
#Now I calculate the duration of the contacts of my tag. I changed this bit since
#I had new discussions with Ciro
#single_tag_no_rep=s.hstack((0,single_tag_no_rep))
#print "single_tag_no_rep is, ", single_tag_no_rep, "and its length is, ", len(single_tag_no_rep)
e2=s.diff(single_tag_no_rep)
#print "e2 is, ", e2
sel=s.where(e2!=1)[0]
#print "sel is, ", sel
res=0 #this will contain the results and will be overwritten
#What is here needs to be tested very carefully! There may be some bugs
sol=s.hstack((0,sel,len(e2)))
#print "sol is, ", sol
res=s.diff(sol)
#print "res initially is, ", res
res[0]=res[0]+1 #to account for troubles I normally have at the beginning of the array
#print "res is, ", res
res=res*delta_slice
#print "the sum of all the durations is, ", res.sum()
return res,gap_distr
#tag_id=509
# my_gaps=time_interval_and_bootcount_single_tag(data_arr,tag_id)
# p.save("single_tag_gaps.dat", my_gaps , fmt='%d')
# all_gaps=time_interval_and_bootcount_all_tags(data_arr)
# p.save("all_tag_gaps.dat",all_gaps , fmt='%d')
all_duration_and_all_counts=visit_duration_many_tags(data_arr)
all_durations=all_duration_and_all_counts[0]
p.save("all_visit_duration.dat",all_durations , fmt='%d')
all_counts=all_duration_and_all_counts[1]
p.save("all_counts.dat",all_counts , fmt='%d')
all_boot_intervals=all_duration_and_all_counts[2]
p.save("all_boot_intervals.dat",all_boot_intervals , fmt='%d')
fig = p.figure()
axes = fig.gca()
n_bins=50
#print "the length of df_distr is, ", len(df_distr)
axes.hist(all_durations/3600.,bins=n_bins, normed=0)
p.ylabel('frequency')
p.xlabel("Duration (hours)")
p.title('Analysis of log_1240843289.pcap')
cluster_name="histogram_visit_duration_from_bootcount.pdf"
p.savefig(cluster_name)
p.clf()
print "So far so good"