Rev. | 505c60f2d073d3f91241438471cdf057b0cc1845 |
---|---|
Größe | 78,895 Bytes |
Zeit | 2009-07-16 20:16:42 |
Autor | lorenzo |
Log Message | A trivial modification: in each time slice I was saving time starting from
|
#!/usr/bin/env python
import sys, string, time
import pcap
import struct
import xxtea
#I import now some extra modules which come handy for array manipulation/visualization and storage
import scipy as s
import pylab as p
import numpy as n
# server ports
UDP_PORT = 2342
# OpenBeacon low-level protocols
PROTO_CONTACTREPORT = 69
PROTO_BEACONTRACKER_NEW = 24
# tag ID ranges and ignored tags
TAG_ID_MIN = 500
TAG_ID_MAX = 2047
# crypto key
#TEA_CRYPTO_KEY = ( 0xd2e3fd73, 0xfa70ca9c, 0xe2575826, 0x0fe09946 )
TEA_CRYPTO_KEY =(0xf6e103d4, 0x77a739f6, 0x65eecead, 0xa40543a9)
xxtea.set_key( *TEA_CRYPTO_KEY )
def output_graph(data_arr):
data_arr=data_arr[:,2:7]
return data_arr
def output_graph_improved(data_arr, tag_no_rep_out):
#This function carries out a few tasks: first of all, it reads the output array
#containing all the contacts that took place in a certain time-slice
temp_t_list=data_arr[:,0] #I select the time column of the array containing the raw
#data
time_list=s.zeros(0) #this is the array I will use to store the times
time_list=time_list.astype("int")
#The idea is the following: if I have
#timestamp ID ID1 ID2 ID3 ID4
#1234555 128 1457 124 1223 -1
#this means that tag 128 had 3 interactions at that timestamp. Hence one should bear
#that in mind. t_list will therefore report that timestamp three times to match
#the fact that in the ordered record ID will appear three times (for that timestamp)
data_arr=data_arr[:,2:7] #i.e. get rid of the timestamps and keep only the ID of the
#tag you are tracking and of the other tags it may have interacted with
node_list=s.zeros(2)-1 #initialization with an impossible value
node_list=node_list.astype("int")
for i in xrange(len(data_arr)): #len(data_arr) is the number of rows of data_arr
sel=s.where(data_arr[i,:] != -1) #i.e. wherever in that row a contact has been registered
#(ID1, ID2, ID3, ID4)
contact=data_arr[i, :][sel] #Now I extract this datum
#A tag may have interacted with 1 to 4 tags simultaneously
#a contact like
# 12 34 44 55 88
# ID ID2 ID3 ID3 ID4
#needs to become
# 12 34
# 12 44
# 12 55
# 12 88
#and this is done by the following if conditions
if (len(contact)==2): #i.e. ID + ID1
node_list=s.vstack((node_list,contact))
time_list=s.hstack((time_list,temp_t_list[i]))
elif (len(contact)==3):
node_list=s.vstack((node_list,contact[0:2]))
node_list=s.vstack((node_list,contact[0:3:2]))
#2 contacts, add that time twice
time_list=s.hstack((time_list,temp_t_list[i],temp_t_list[i]))
#time_list=s.hstack((time_list,temp_t_list[i]))
elif (len(contact)==4):
#3 contacts, add that time three times
time_list=s.hstack((time_list,temp_t_list[i],temp_t_list[i],temp_t_list[i]))
# time_list=s.hstack((time_list,temp_t_list[i]))
# time_list=s.hstack((time_list,temp_t_list[i]))
node_list=s.vstack((node_list,contact[0:2]))
node_list=s.vstack((node_list,contact[0:4:2]))
node_list=s.vstack((node_list,contact[0:4:3]))
elif (len(contact)==5): #i.e. ID +ID1, ID2, ID3, ID4
#4 contacts, add that time 4 times
time_list=s.hstack((time_list,temp_t_list[i],temp_t_list[i],temp_t_list[i],temp_t_list[i]))
# time_list=s.hstack((time_list,temp_t_list[i]))
# time_list=s.hstack((time_list,temp_t_list[i]))
# time_list=s.hstack((time_list,temp_t_list[i]))
node_list=s.vstack((node_list,contact[0:2]))
node_list=s.vstack((node_list,contact[0:4:2]))
node_list=s.vstack((node_list,contact[0:4:3]))
node_list=s.vstack((node_list,contact[0:5:4]))
node_list=node_list[1:len(node_list),:] #to remove the set of -1 that fill the upper row
#used to initialize node_list
node_list=node_list.astype('int') #convert to integer
node_list=s.sort(node_list) #to make entries like 1 5 and 5 1 look the same
#Save results as binary contacts with interaction time
tag_tag_bin=s.transpose(s.vstack((time_list,s.transpose(node_list))))
node_list_no_rep=order_count(node_list)[0] #to get rid of duplicated entries
if (tag_no_rep_out==1):
#p.save("graph_list_no_rep.dat",node_list_no_rep , fmt='%d') #this way I get a list of
#social contacts without any repetition of overcounting i-->j and j-->i interaction
return node_list_no_rep
else :
#p.save("tag-tag-binary-contacts.dat",tag_tag_bin, fmt='%d' )
return tag_tag_bin
def dump_matrix( adj_list,slice_label):
#print "adj_list is, ", adj_list
sel_mat=adj_list[:,1:3]
sel_mat=sel_mat.astype("int")
#print "sel_mat is, ", sel_mat
my_mat=s.zeros((2000,2000))
#adj_list.astype("int")
my_mat[sel_mat]=1
#print "my_mat is, ", my_mat
temp_name="adj_mat%05d"%slice_label
p.save(temp_name,my_mat, fmt='%d' )
return 0
class Sighting:
def __init__(self, tstamp, ip, id, seq, strength, flags, last_seen=0, boot_count=0):
self.t = tstamp
self.ip = ip
self.id = id
self.seq = seq
self.strength = strength
self.flags = flags
self.last_seen = last_seen
self.boot_count = boot_count
def __repr__(self):
return 'S %ld 0x%08x %d 0x%08x %d %d %s %s' % (self.t, self.ip, self.id, self.seq, self.strength, self.flags, self.last_seen, self.boot_count)
class Contact:
def __init__(self, tstamp, ip, id, seq, id_seen, flags):
self.t = tstamp
self.ip = ip
self.id = id
self.seq = seq
self.flags = flags
self.seen_id = []
self.seen_pwr = []
self.seen_cnt = []
for id2 in id_seen:
if not id2:
break
self.seen_id.append(id2 & 0x07FF)
self.seen_pwr.append(id2 >> 14)
self.seen_cnt.append((id2 >> 11) & 0x07)
def get_hash(self):
return hash( (self.id, self.seq) )
def __repr__(self):
clist = ""
for (id2, pwr, count) in zip(self.seen_id, self.seen_pwr, self.seen_cnt):
clist += " [%d(%d) #%d]" % (id2, pwr, count)
return 'C %ld 0x%08x %d 0x%08x %d%s' % (self.t, self.ip, self.id, self.seq, self.flags, clist)
#return 'C 0x%08x' % (self.seq)
#A brief description of the following class: it is a modified (probably not very well written)
#version of Contact class. It aims at reading the data from a Contact protcol packet and storing
#the relevant info as a numpy array for further manypulation (not performed by this class).
class Contact_reduced:
def __init__(self, tstamp, ip, id, seq, id_seen, flags):
self.t = tstamp
self.ip = ip
self.id = id
self.seq = seq
self.flags = flags
self.seen_id = []
self.seen_pwr = []
self.seen_cnt = []
for id2 in id_seen:
#a fundamental modification now: I am not disregarding the case of
#a packet which reports to be on his own.
if not id2:
break
self.seen_id.append(id2 & 0x07FF)
self.seen_pwr.append(id2 >> 14)
self.seen_cnt.append((id2 >> 11) & 0x07)
#print "self.seen_id, len(self.seen_id) is, ", self.seen_id, len(self.seen_id)
def data_out_2(self): #This method has been added to the class. It saves 7 overall quantities:
#time, hash((id,seq)), id of the tag I am tracking, id of 4 other tags it may be
#in contact with.
#A choice is made: we get rid of all the contacts taking place at high powers (pwr=2)
#since they amount to interactions between far away tags. The 6 quantities are stored
#in a numpy array of integers (I see no point in using floating points since time is
#discrete and the numbers labelling the tags are integers).
data_arr=s.zeros(7)-1 #this is the array that will contain the afore-mentioned data
#However, I initialize it with an "impossible" value to make sure I know when it is
#overwritten with useful information
clist = ""
id_list=""
i=0 #I initialize to zero a counter which saves the info in the "right"
#position inside data_arr
#for (id2, pwr, count) in zip(self.seen_id, self.seen_pwr, self.seen_cnt):
print "self.t is, ", self.t
for (id2, pwr, count) in zip(self.seen_id, self.seen_pwr, self.seen_cnt):
print "self.seen_id is, ", self.seen_id
print "self.seen_pwr is, ", self.seen_pwr
print "id2 is, ", id2
print "pwr is, ", pwr
clist += " [%d(%d) #%d]" % (id2, pwr, count)
#print "clist, len(clist) is, ", clist, len(clist)
#In the following I choose when to overwrite the data_arr
#The following condition is twofold: first of all a contact
#between two tags needs to have been detected (hence the length of clist
#needs to be >0)
#and I am also requiring
#the connection to take place at radio powers smaller than 2 i.e. the two tags are
#physically close
#if (len(clist)>0 and self.seen_pwr[i]<2):
#if (len(self.seen_id)>0 and self.seen_pwr[i]<2):
if (len(self.seen_id)>0 and pwr<2):
#if (len(self.seen_id)>0 and self.seen_pwr<2):
data_arr[0]=self.t #saves time
data_arr[1]=hash((self.id, self.seq)) #using the hash function, I condense the
#info about the packet id and seq numbers into a single number
#NB:hash(a,b)!= hash(b,a) i.e. the hash function is not symmetric in its arguments
data_arr[2]=self.id
data_arr[i+3]=id2 #saves (whenever they exist) the IDs of the tags
#in contact with the one I am tracking. This entry will be zero if
#no connection is detected.
if (data_arr[0]==1243875754):
print "data_arr is, ", data_arr
i=i+1
if (len(self.seen_id)==0):
#print "self interaction!!"
print "self_interaction and self.seen_id is, ", self.seen_id
data_arr[0]=self.t #save time
data_arr[1]=hash((self.id, self.seq))
data_arr[2]=self.id #save the ID of the tag I am tracking
data_arr[3]=self.id #save (whenever they exist) the IDs of the tags
#print "data_arr for self-interaction is, ", data_arr.astype('int')
#convert everything to integer (I really have no need for double precision numbers)
data_arr=data_arr.astype('int')
return data_arr
def data_out_3(self): #This method has been added to the class. It saves 7 overall quantities:
#time, hash((id,seq)), id of the tag I am tracking, id of 4 other tags it may be
#in contact with.
#A choice is made: we get rid of all the contacts taking place at high powers (pwr=2)
#since they amount to interactions between far away tags. The 6 quantities are stored
#in a numpy array of integers (I see no point in using floating points since time is
#discrete and the numbers labelling the tags are integers).
data_arr=s.zeros(7)-1 #this is the array that will contain the afore-mentioned data
#However, I initialize it with an "impossible" value to make sure I know when it is
#overwritten with useful information
#print "self.t is, ", self.t
id_arr=s.asarray(self.seen_id)
id_arr=id_arr.astype("int")
pwr_arr=s.array(self.seen_pwr)
pwr_arr=pwr_arr.astype("int")
cnt_arr=s.asarray(self.seen_cnt)
cnt_arr=cnt_arr.astype("int")
if (len(id_arr)==0): #this is the case when the tag reports being alone
data_arr[0]=self.t #save time
data_arr[1]=hash((self.id, self.seq))
data_arr[2]=self.id #save the ID of the tag I am tracking
data_arr[3]=self.id #save (whenever they exist) the IDs of the tags
if (len(id_arr)>0): #now the tag has estabilished some contacts, but I need to
#remove those which took place at high power
test_pwr_arr=s.ones(len(pwr_arr)).astype("int")*2
#this array correspond to what I would see if all the contacts took place at
#high power
if ((pwr_arr==test_pwr_arr).all()): #in this case all the contacts have
#been estabilished at high power hence the tag is considered as by itself
data_arr[0]=self.t #save time
data_arr[1]=hash((self.id, self.seq))
data_arr[2]=self.id #save the ID of the tag I am tracking
data_arr[3]=self.id #save (whenever they exist) the IDs of the tags
else: #now some contacts have taken place at low power and need to be recorded
sel=s.where(pwr_arr==1) #that is to say, select the contact which took place
#at low power
id_arr_new=id_arr[sel] #and get the IDs of the corresponding tags
data_arr[0]=self.t #saves time
data_arr[1]=hash((self.id, self.seq)) #using the hash function, I condense the
#info about the packet id and seq numbers into a single number
#NB:hash(a,b)!= hash(b,a) i.e. the hash function is not symmetric in its arguments
data_arr[2]=self.id
data_arr[3:(3+len(id_arr_new))]=id_arr_new
#saves (whenever they exist) the IDs of the tags
#in contact with the one I am tracking. This entry will be zero if
#no connection is detected.
data_arr=data_arr.astype('int')
return data_arr
def my_hash(arr):
my_hash=hash((arr[0], arr[1]))
return my_hash
def couple_hash_table(sliced_data):
hash_list=s.arange(len(sliced_data))
for i in xrange(len(sliced_data)):
hash_list[i]=my_hash(sliced_data[i,1:3 ])
hash_and_time=s.transpose(s.vstack((sliced_data[:, 0],hash_list)))
p.save("hash_and_time.dat", hash_and_time,fmt='%d')
#the aim of this function is to identify each reported binary contact with a single number.
#then, instead of looking for couples of interacting tags, I will simply look for repeated
#occurrences of a number
return hash_and_time
def detect_binary_contacts_among_same_two_tags(hash_and_time, delta_slice):
couple_list_unique=s.unique1d(hash_and_time[:,1]) #then I have the identifier of each
# couple of interacting tags
couple_list=hash_and_time[:,1]
#print "couple_list_unique is, ", couple_list_unique
time_arr=hash_and_time[:,0] #important! Measure everything in units
#of delta_slice
couple_interaction_duration=s.zeros(0) #this array will store the times at which the SAME
#i-j tags interact.
couple_interaction_interval=s.zeros(0)
for i in xrange(len(couple_list_unique)):
chosen_couple=couple_list_unique[i]
sel_chosen_couple_times=s.where(couple_list==chosen_couple)
chosen_couple_times=time_arr[sel_chosen_couple_times]
#print "i is, ", i
#print "chosen_couple_times is, ", chosen_couple_times
chosen_couple_contact_duration= \
contact_duration_and_interval_single_couple(chosen_couple_times, delta_slice)[0]
chosen_couple_contact_interval= \
contact_duration_and_interval_single_couple(chosen_couple_times, delta_slice)[1]
couple_interaction_duration=s.hstack((couple_interaction_duration,\
chosen_couple_contact_duration))
couple_interaction_interval=s.hstack((couple_interaction_interval,\
chosen_couple_contact_interval))
p.save("couple_interaction_duration_times.dat", couple_interaction_duration,fmt='%d')
p.save("couple_interaction_interval_times.dat", couple_interaction_interval,fmt='%d')
return couple_interaction_duration
#A new class I added: it is designed to handle the output of the Contact_reduced class.
#It is a downsizing of process_packet. It handles only the case of a Contact protocol
#and retrieves the data output by Contact_reduced
def process_packet_reduced(pktlen, data, timestamp):
#print time.ctime(timestamp), pktlen, len(data)
#payload = data[44:44+16]
payload = data[42:42+16]
decrypted_data = xxtea.decode(payload)
#station_id = struct.unpack('!L', data[28:28+4])[0]
station_id_low = struct.unpack('!H', data[28:28+2])[0]
station_id_high = struct.unpack('!H', data[30:30+2])[0]
station_id = (station_id_high << 16) | station_id_low
tstamp = int(timestamp)
data_array=s.zeros(7)-1 #this array is simply initialized here; it will be overwritten
# with the results I calculate. Even the length of this array does not matter as long
#as it exists outside the loop
data_array=data_array.astype('int')
#Better to use integers, since each tag is really labelled
#by an integer number
#print "tstamp is, ", tstamp
proto = struct.unpack("!B", decrypted_data[0])[0]
if proto == PROTO_CONTACTREPORT:
(proto, id, flags, id1, id2, id3, id4, seq, crc) = struct.unpack("!BHBHHHHHH", decrypted_data)
if crc != xxtea.crc16(decrypted_data[:14]):
#print 'rejecting packet from 0x%08x on CRC' % station_id
return
contact = Contact_reduced(tstamp, station_id, id, seq, [id1, id2, id3, id4], flags)
#data_array=contact.data_out() #automatically stored as an array of integers
#data_array=contact.data_out_2() #automatically stored as an array of integers
#but it combines packet ID and seq into a single integer using the hash table
data_array=contact.data_out_3() #automatically stored as an array of integers
#but it combines packet ID and seq into a single integer using the hash table
#print "Now data_array is, ", data_array.astype("int")
return data_array.astype("int")
# def process_packet_reduced_improved(pktlen, data, timestamp):
# #print time.ctime(timestamp), pktlen, len(data)
# payload = data[44:44+16]
# decrypted_data = xxtea.decode(payload)
# station_id = struct.unpack('!L', data[28:28+4])[0]
# tstamp = int(timestamp)
# data_array=s.zeros(6)-1 #this array is simply initialized here; it will be overwritten
# # with the results I calculate
# data_array= data_array.astype('int')
# #Better to use integers, since each tag is really labelled
# #by an integer number
# #print "tstamp is, ", tstamp
# proto = struct.unpack("!B", decrypted_data[0])[0]
# if proto == PROTO_CONTACTREPORT:
# (proto, id, flags, id1, id2, id3, id4, seq, crc) = struct.unpack("!BHBHHHHHH", decrypted_data)
# if crc != xxtea.crc16(decrypted_data[:14]):
# print 'rejecting packet from 0x%08x on CRC' % station_id
# return
# contact = Contact_reduced(tstamp, station_id, id, seq, [id1, id2, id3, id4], flags)
# data_array=contact.data_out() #automatically stored as an array of integers
# return data_array
# #The process_packet_reduced function looks into one packet and retrieves the info which
# #ends up into a numpy array. The following function iterates this on a set of times.
# #The info is collected only when a Contact has been detected and the info at different
# #time-steps is stitched together.
# def iter_process_packet_reduced( start, delta, dumpfile):
# P = pcap.pcapObject()
# P.open_offline(dumpfile)
# temp_arr=s.zeros(6)-1 #this array will be used to store the results of the computations
# #and it is also the first row (temporarily) of the array (it will be removed later on
# #in this function)
# temp_arr=temp_arr.astype('int')
# i=0 #initialize counter
# while 1:
# t=P.next() #So I start reading the info for each packet in increasing time
# if (i>start and i<=start+delta): #condition about when to start and
# #finish acquiring the data
# data_array=process_packet_reduced(*t)
# if (data_array!= None): #Important! Otherwise the code would break down
# #when a packet has been rejected and nothing is read into data_array
# if (data_array[0] != -1): #that means: the value I used to initialize
# #the array (in particular time) is -1. If that is still the value,
# #then the array has never been overwritten and no contacts was detected.
# #Hence there is nothing worth saving
# temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
# #temp_arr has an extra row! (the initialized value of temp_arr we spoke
# #of before)
# i=i+1 #increase the counter
# if (i>start+delta):
# print "End of the game"
# break
# temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
# #and was used to initialize the array
# return temp_arr
def iter_process_packet_reduced_improved_new( start, delta, dumpfile, delta_t):
#I need to introduce some extra checks to determine whether a packet really represents
# a new event or not. In general, there are three things I should take into account
#1)-2) whether the packets have the same id (of the examined tags) and sequence numbers
#(of the broadcast packet)
#3) whether the packets have a similar time-stamp. Due to delays in the broadcast,
# two packets emitted with a slight (say, 1 sec) time-gap still represent the
#same contact, if they have the same id and seq. However, I cannot in general
#say that two packages are the same if the ID and seq are identical, since the device
#recycles the same id every few hours. Hence the idea is to have a continuously updated list with
#the hash values of the latest (say, last 3 seconds) recorded packages. Then I just see if the hash
#of my newly-detected packet is there or not. If it is, then it is an already detected contact and
#I will not add it to my contact list, if it is not, then I will record such an event.
#I also need to trim and update my list as time progresses.
#I left the comments, but I stopped using my_hash_set. The reason is that, pretty much the
#same way, I can check whether a certain entry does or does not belong to an array.
#The repetition of a few elements in an array does not justify using an altogether different
#data structure
#my_hash_set=set() #I save the result of hash((id,seq)) into a set
#This is the right data type since it does not change if another existing
#element is added and it is easy to check if a certain value is already
#in the set.
end=start+delta #time at which I will stop reading
my_hash_list=s.zeros(0) #This scipy array will be used to store the hash of the
#detected packages as time goes by. Unlike the set defined above, this does not prevent
#any repetition of the detected (or rather calculated) hash value
contact_times=s.zeros(0) #in a similar fashion as I did above, this array will be used to
#store the timestamps of the detected packages
#I am using the two previous arrays simply as easy-to-manipulate lists
#delta_t is the admissible time-delay to consider two timestamps as close enough
#to be potentially identical
#delta_t=5
P = pcap.pcapObject()
P.open_offline(dumpfile)
temp_arr=s.zeros(7)-1 #this array will be used to store the results of the computations
#and it is also the first row (temporarily) of the array (it will be removed later on
#in this function)
temp_arr=temp_arr.astype('int')
flag=0
flag2=0
while 1:
t=P.next() #So I start reading the info for each packet in increasing time
if (t!=None): #a condition so that the code does not crash if the end of
#the log file is reached while reading
data_array=process_packet_reduced(*t)
if (data_array!= None): #Important! Otherwise the code would break down
#when a packet has been rejected and nothing is read into data_array
#The previous if condition says that something should be done only if
#at least a contact protocol has been detected
#print out the first time a contact is detected
if ((flag2==0) and (data_array[0]!= -1)):
print "the first time a valid contact report is sent is, ", data_array[0]
flag2=1
if ((data_array[0]>=start) and (data_array[0]<=end)): #that means: if the time I am
#reading is within the interval I have chosen for packet analysis. This automa
#tically rules out the case of non-detection of a contact, in which case the
#entry data_arry[0] would be -1
#print "time is, ", data_array[0]-start
if (flag == 0):
print "time is, ", data_array[0], "and I start reading"
flag=1
#Now an important point: I test directly now whether the newly-read
#packet really represents a new interaction or it is another already accounted
#for but repeated twice
#NB: data_array[1] contains hash((id,seq))
#whereas data_array[0] contains the timestamp of the detected package
#I only left the comments. Now I simply check whether the hash((id,seq))
#is in the array that contains them. Since I discard it if it is already
#there, then I do not have any problem of multiple occurrence.
#if (data_array[1] not in my_hash_set): #that is to say: the hash((id,seq))
#of the packet should not have already been previously recorded.
#this is certainly true at least to start from, when my_hash_set
#is empty
if (data_array[1] not in my_hash_list): #that is to say: the hash((id,seq))
#of the packet should not have already been previously recorded.
#this is certainly true at least to start from, when my_hash_list
#is empty
temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
#temp_arr has an extra row! (the initialized value of temp_arr we spoke
#of before)
#temp_arr contains the info about the recorded contacts
#my_hash_set.add(data_array[1]) #this is a set: no entry is ever duplicated
#This way I add the hash value of the latest packet I have read to the
#set of hash values of the detected contacts
my_hash_list=s.hstack((my_hash_list,data_array[1])) #Now I save
#the same info as above into an array. Here entries can be duplicated without
#problems.
contact_times=s.hstack((contact_times, data_array[0])) #In a similar fashion
#as above, I save the timestamps of the detected contacts allowing for
#repetitions.
#Now I remove from my list the contact times which are too old
#First I identify the contacts which took place a longer than delta_t ago
time_sel=s.where((data_array[0]-contact_times)<delta_t)
#Then I get rid of those contacts
contact_times=contact_times[time_sel]
#and of the corresponding hash values
my_hash_list=my_hash_list[time_sel]
#and I save again as a set the array with the updated hash values
#my_hash_set=set(my_hash_list)
elif(data_array[0]>end):
print "time is, ", data_array[0], "and I stop reading"
break
elif(t==None):
print "end of file"
break
temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
#and was used to initialize the array
return temp_arr
def iter_local( start, delta, dumpfile, delta_t, delta_slice,tag_no_rep_out):
#This is a modification of iter_process_packet_reduced_improved_new. It saves the
#results more often in a multitude of files while post-processing them instead of
#keeping in memory expensive global quantities.
#I need to introduce some extra checks to determine whether a packet really represents
# a new event or not. In general, there are three things I should take into account
#1)-2) whether the packets have the same id (of the examined tags) and sequence numbers
#(of the broadcast packet)
#3) whether the packets have a similar time-stamp. Due to delays in the broadcast,
# two packets emitted with a slight (say, 1 sec) time-gap still represent the
#same contact, if they have the same id and seq. However, I cannot in general
#say that two packages are the same if the ID and seq are identical, since the device
#recycles the same id every few hours. Hence the idea is to have a continuously updated list with
#the hash values of the latest (say, last 3 seconds) recorded packages. Then I just see if the hash
#of my newly-detected packet is there or not. If it is, then it is an already detected contact and
#I will not add it to my contact list, if it is not, then I will record such an event.
#I also need to trim and update my list as time progresses.
#I left the comments, but I stopped using my_hash_set. The reason is that, pretty much the
#same way, I can check whether a certain entry does or does not belong to an array.
#The repetition of a few elements in an array does not justify using an altogether different
#data structure
#my_hash_set=set() #I save the result of hash((id,seq)) into a set
#This is the right data type since it does not change if another existing
#element is added and it is easy to check if a certain value is already
#in the set.
end=start+delta #time at which I will stop reading
my_hash_list=s.zeros(0) #This scipy array will be used to store the hash of the
#detected packages as time goes by. Unlike the set defined above, this does not prevent
#any repetition of the detected (or rather calculated) hash value
contact_times=s.zeros(0) #in a similar fashion as I did above, this array will be used to
#store the timestamps of the detected packages
#I am using the two previous arrays simply as easy-to-manipulate lists
#delta_t is the admissible time-delay to consider two timestamps as close enough
#to be potentially identical
#delta_t=5
P = pcap.pcapObject()
P.open_offline(dumpfile)
temp_arr=s.zeros(7)-1 #this array will be used to store the results of the computations
#and it is also the first row (temporarily) of the array (it will be removed later on
#in this function)
temp_arr=temp_arr.astype('int')
flag=0
flag2=0
low_bin=0 #what I will use to keep track of the bin structure
count_flag=0 #this flag tells me whether I finished reading a timeslice.
time_delta=0 #this will tell me the time progressed after reading the previous slice
extension=".dat"
slice_label=0
while 1:
t=P.next() #So I start reading the info for each packet in increasing time
if (t!=None): #a condition so that the code does not crash if the end of
#the log file is reached while reading
data_array=process_packet_reduced(*t)
if (data_array!= None): #Important! Otherwise the code would break down
#when a packet has been rejected and nothing is read into data_array
#The previous if condition says that something should be done only if
#at least a contact protocol has been detected
#print out the first time a contact is detected
if ((flag2==0) and (data_array[0]!= -1)):
print "the first time a valid contact report is sent is, ", data_array[0]
flag2=1
if ((data_array[0]>=start) and (data_array[0]<=end)): #that means: if the time I am
#reading is within the interval I have chosen for packet analysis. This automa
#tically rules out the case of non-detection of a contact, in which case the
#entry data_arry[0] would be -1
#print "time is, ", data_array[0]-start
if (flag == 0):
print "time is, ", data_array[0], "and I start reading"
flag=1
#Now an important point: I test directly now whether the newly-read
#packet really represents a new interaction or it is another already accounted
#for but repeated twice
#NB: data_array[1] contains hash((id,seq))
#whereas data_array[0] contains the timestamp of the detected package
#I only left the comments. Now I simply check whether the hash((id,seq))
#is in the array that contains them. Since I discard it if it is already
#there, then I do not have any problem of multiple occurrence.
#if (data_array[1] not in my_hash_set): #that is to say: the hash((id,seq))
#of the packet should not have already been previously recorded.
#this is certainly true at least to start from, when my_hash_set
#is empty
if (data_array[1] not in my_hash_list): #that is to say: the hash((id,seq))
#of the packet should not have already been previously recorded.
#this is certainly true at least to start from, when my_hash_list
#is empty
#Here I need a condition: before appending the new data to the array,
#I can tell that I am in a new time-slice, since I already know whether
#the new time is acceptable or not.
if (count_flag == 0): #the condition goes here since I need to make sure that
#the time is perfectly acceptable
#time_delta = data_array[0] #time from beginning of the slice
low_bin=start #this is done only
count_flag=1 #important in order to go through this only once
#if (data_array[0]-time_delta>=delta_slice):
if (data_array[0]-low_bin>=delta_slice): #this means I went through
#a time slice and it is time to save the contacts
#delta_label=s.fix((data_array[0]-start)/delta_slice)*delta_slice
#delta_label=low_bin
#I use delta_label to avoid troubles in case I have a time_slice during
#which no contact are estabilished (which should be rather rasre however)
#print "delta_label is, ", delta_label
#time_delta=data_array[0] #update time at the beginning of a slice
#here is the mistake! This time is already in the new slice!!!
print "time is, ", data_array[0]
temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply
#a set of -1 and was used to initialize the array
#print "temp_arr is, ", temp_arr
#print "s.shape(temp_arr) is, ", s.shape(temp_arr)
tag_tag_slice=output_graph_improved(temp_arr, tag_no_rep_out)
#if (slice_label == 1785):
#p.save("slice_1785.dat",tag_tag_slice, fmt='%d')
#p.save("temp_arr_1785.dat", temp_arr, fmt='%d' )
#if (slice_label == 1786):
#p.save("slice_1786.dat",tag_tag_slice, fmt='%d')
#p.save("temp_arr_1786.dat", temp_arr, fmt='%d' )
#if (slice_label == 1640):
#p.save("slice_1640.dat",tag_tag_slice, fmt='%d')
#p.save("temp_arr_1640.dat", temp_arr, fmt='%d' )
#if (slice_label == 200):
#p.save("slice_200.dat",tag_tag_slice, fmt='%d')
#p.save("temp_arr_200.dat", temp_arr, fmt='%d' )
#if (slice_label == 201):
#p.save("slice_201.dat",tag_tag_slice, fmt='%d')
#p.save("temp_arr_201.dat", temp_arr, fmt='%d' )
#print "tag_tag_slice is, ", tag_tag_slice
#print "s.shape(tag_tag_slice) is, ", s.shape(tag_tag_slice)
#bin_dyn=time_binned_interaction_new(tag_tag_slice, delta_slice,\
#delta_label,start)
#bin_dyn=time_binned_interaction_new_reduced(tag_tag_slice, delta_slice,\
# delta_label,start)
bin_dyn=time_binned_interaction_new_reduced(tag_tag_slice, delta_slice,\
low_bin,start)
#print "slice_label is, ", slice_label
#Now I need to update the value of low_bin using only the lcal
low_bin=s.fix((data_array[0]-start)/delta_slice)*delta_slice+start
temp_name="sliced_dynamics_self_loops%05d"%slice_label
temp_name=temp_name+extension
p.save(temp_name,bin_dyn, fmt='%d')
#dump_matrix(bin_dyn[0] ,slice_label)
#temp_name="sliced_dynamics%05d"%slice_label
#temp_name=temp_name+extension
#p.save(temp_name,bin_dyn[1], fmt='%d')
slice_label += 1
#now re-initialize temp_arr
temp_arr=s.zeros(7)-1 #this array will be used to store the results
#of the computations
#and it is also the first row (temporarily) of the array
#(it will be removed later on in this function)
temp_arr=temp_arr.astype('int')
temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
#temp_arr has an extra row! (the initialized value of temp_arr we spoke
#of before)
#temp_arr contains the info about the recorded contacts
#my_hash_set.add(data_array[1]) #this is a set: no entry is ever duplicated
#This way I add the hash value of the latest packet I have read to the
#set of hash values of the detected contacts
my_hash_list=s.hstack((my_hash_list,data_array[1])) #Now I save
#the same info as above into an array. Here entries can be duplicated without
#problems.
contact_times=s.hstack((contact_times, data_array[0])) #In a similar fashion
#as above, I save the timestamps of the detected contacts allowing for
#repetitions.
#Now I remove from my list the contact times which are too old
#First I identify the contacts which took place a longer than delta_t ago
time_sel=s.where((data_array[0]-contact_times)<delta_t)
#Then I get rid of those contacts
contact_times=contact_times[time_sel]
#and of the corresponding hash values
my_hash_list=my_hash_list[time_sel]
#and I save again as a set the array with the updated hash values
#my_hash_set=set(my_hash_list)
elif(data_array[0]>end):
print "time is, ", data_array[0], "and I stop reading"
break
elif(t==None):
print "end of file"
break
#temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
#and was used to initialize the array
return 0
def single_tag_contact_times(sliced_data, tag_id):
#The idea is simple: thanks to the output of time_binned_interaction
#I have a list of binary interactions in time intervals delta_slice
#along the whole set of recorded times.
#This means I can follow a given tag by selecting his ID number and looking
#for it through the data
sel=s.where(sliced_data==tag_id)
#now I need to add a condition in case the id of the tag I have chosen is non-existing
#NB: this is not a problem when I evaluate the global quantities (e.g. I consider the
#contacts between all tags) since in that case I read the id`s of the existing tags from
#an array. The problem is when I run a test and track a tag which I do not know a priori
#if existing in the simulation at all
if (len(sel[0])==0):
#print "the chosen tag does not exist"
return
tag_contact_times=sliced_data[sel[0],0] #I select the times at which
#the tag I am tracking undergoes a collision.
#p.save("single_tag_contact_times.dat",tag_contact_times,fmt='%d')
tag_no_rep=n.unique1d(tag_contact_times) #The idea is the following:
#in a given time interval delta_slice, a tag may undergo multiple contacts
#with different tags. This corresponds to different entries in the output
#of time_binned_interaction. That function does not allow for multiple contacts between
# the SAME two tags being reported in the same time slice, but it allows the same tag ID
#to appear twice in the same time window if it gets in touch with TWO different tags
#within the same delta_slice. It is fundamental to know that in a given time slice
#tag A has estabilished contact with tag B and tag C (if I discard any bit of this info,
#then I lose info about the state of the network at that time slice), but when it comes to
#simply having the time-dependent distribution of contact durations and intervals between
#any two contacts estabilished by packet A, I will simply say that tag A reported a contact
#in that given time-slice. More sophisticated statistics (e.g. the number of contacts
#estabilished by tag A in a given time slice), can be implemented if found useful/needed
#later on.
#p.save("single_tag_contact_times_no_rep.dat",tag_no_rep,fmt='%d')
return tag_no_rep
def contact_duration_and_interval_single_couple(time_list, delta_slice):
#this function is modelled on contact_duration_and_interval_single_tag.
#it is only the initial array to be different
if (time_list==None):
print "The chosen couple does not exist hence no analysis can be performed on it"
return
delta_slice=int(delta_slice) #I do not need floating point arithmetic
time_list=(time_list-time_list[0])/delta_slice
gaps=s.diff(time_list) #a bit more efficient than the line above
#print "gaps is, ", gaps
#gaps is now an array of integers. It either has a list of consecutive 1`s
# (which means a contact duration of delta_slice times the number of consecutive ones)
# of an entry higher than one which expresses (in units of delta_slice) the time during
#which the tag underwent no contact
#p.save("gaps.dat",gaps, fmt='%d')
find_gap=s.where(gaps != 1)
gap_distr=(gaps[find_gap]-1)*delta_slice #so, this is really the list of the
#time interval between two contacts for my tag. After the discussion with Ciro,
#I modified slightly the definition (now there is a -1) in the definition.
#It probably does not matter much for the calculated distribution.
#print "gap_distr is, ", gap_distr
#NB: the procedure above does NOT break down is gap_distr is empty
#Now I calculate the duration of the contacts of my tag. I changed this bit since
#I had new discussions with Ciro
#single_tag_no_rep=s.hstack((0,single_tag_no_rep))
#print "single_tag_no_rep is, ", single_tag_no_rep, "and its length is, ", len(single_tag_no_rep)
e2=s.diff(time_list)
#print "e2 is, ", e2
sel=s.where(e2!=1)[0]
#print "sel is, ", sel
res=0 #this will contain the results and will be overwritten
#What is here needs to be tested very carefully! There may be some bugs
sol=s.hstack((0,sel,len(e2)))
#print "sol is, ", sol
res=s.diff(sol)
#print "res initially is, ", res
res[0]=res[0]+1 #to account for troubles I normally have at the beginning of the array
#print "res is, ", res
res=res*delta_slice
#print "the sum of all the durations is, ", res.sum()
return res,gap_distr
def contact_duration_and_interval_single_tag(single_tag_no_rep, delta_slice):
#the following if condition is useful only when I am really tracking a particular
#tag whose ID is given a priory but which may not exist at all (in the sense that
#it would not estabilish any contact) in the time window during which I am studying
#the system.
if (single_tag_no_rep==None):
print "The chosen tag does not exist hence no analysis can be performed on it"
return
delta_slice=int(delta_slice) #I do not need floating point arithmetic
single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above
#print "gaps is, ", gaps
#gaps is now an array of integers. It either has a list of consecutive 1`s
# (which means a contact duration of delta_slice times the number of consecutive ones)
# of an entry higher than one which expresses (in units of delta_slice) the time during
#which the tag underwent no contact
#p.save("gaps.dat",gaps, fmt='%d')
find_gap=s.where(gaps != 1)
gap_distr=(gaps[find_gap]-1)*delta_slice #so, this is really the list of the
#time interval between two contacts for my tag. After the discussion with Ciro,
#I modified slightly the definition (now there is a -1) in the definition.
#It probably does not matter much for the calculated distribution.
#print "gap_distr is, ", gap_distr
#NB: the procedure above does NOT break down is gap_distr is empty
#Now I calculate the duration of the contacts of my tag. I changed this bit since
#I had new discussions with Ciro
#single_tag_no_rep=s.hstack((0,single_tag_no_rep))
#print "single_tag_no_rep is, ", single_tag_no_rep, "and its length is, ", len(single_tag_no_rep)
e2=s.diff(single_tag_no_rep)
#print "e2 is, ", e2
sel=s.where(e2!=1)[0]
#print "sel is, ", sel
res=0 #this will contain the results and will be overwritten
#What is here needs to be tested very carefully! There may be some bugs
sol=s.hstack((0,sel,len(e2)))
#print "sol is, ", sol
res=s.diff(sol)
#print "res initially is, ", res
res[0]=res[0]+1 #to account for troubles I normally have at the beginning of the array
#print "res is, ", res
res=res*delta_slice
#print "the sum of all the durations is, ", res.sum()
return res,gap_distr
def contact_duration_and_interval_many_tags(sliced_interactions, delta_slice):
#This function iterates interval_between_contacts_single_tag on a all the tag ID`s
#thus outputting the distribution of time intervals between any two contacts in the system.
tag_ids= n.unique1d(s.ravel(sliced_interactions[:,1:3])) #to get a list of
#all tag ID`s, which appear (repeated) on two rows of the matrix output by
# time_binned_interaction
tag_ids=tag_ids.astype('int')
#print "tag_ids is, ", tag_ids
overall_gaps=s.zeros(0) #this array will contain the time intervals between two consecutive
#contacts for all the tags in the system.
overall_duration=s.zeros(0) #this array will contain the time duration of the
#contacts for all the tags in the system.
for i in xrange(len(tag_ids)):
track_tag_id=tag_ids[i] #i.e. iterate on all tags
contact_times=single_tag_contact_times(sliced_interactions, track_tag_id) #get
#an array with all the interactions of a given tag
#print "contact_times is, ", contact_times
results=contact_duration_and_interval_single_tag(contact_times, delta_slice)
tag_duration=results[0]
tag_intervals=results[1] #get
#an array with the time intervals between two contacts for a given tag
#print "tag_intervals is, ", tag_intervals
overall_gaps=s.hstack((overall_gaps,tag_intervals)) #collect
#the results on all tags
#print "overall_gaps is, ", overall_gaps
overall_duration=s.hstack((overall_duration,tag_duration))
#overall_gaps=overall_gaps[s.where(overall_gaps !=0)]
#overall_duration=overall_duration[s.where(overall_duration !=0)]
p.save("many_tags_contact_interval_distr2.dat", overall_gaps , fmt='%d')
p.save("many_tags_contact_duration_distr2.dat", overall_duration , fmt='%d')
return overall_duration, overall_gaps
def contact_duration_single_tag(single_tag_no_rep, delta_slice):
delta_slice=int(delta_slice) #I do not need floating point arithmetic
single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
#Now a series of array manipulations. If this function works, it should be merged
#with interval_between_contacts_single_tag
#append 0 in front of the array
single_tag_no_rep=s.hstack((0,single_tag_no_rep))
e2=s.diff(single_tag_no_rep)
sel=s.where(e2!=1)[0]
f=s.diff(sel)
res=f[s.where(f!=1)]
res=res-1 #since e.g. having a sequence (0,1,2) means 3 adjacent numbers but the overall
#contact lasts TWO (not three) delta_slice`s
res=res*delta_slice #to have the time intervals in the right units
return res
def contact_duration_many_tags(sliced_interactions, delta_slice):
#this function is basically a slight modification of interval_between_contacts_many_tags
#See that function for documentation. This also accounts for the non-intuitive choice of the names
#used for the arrays
tag_ids= n.unique1d(s.ravel(sliced_interactions[:,1:3])) #to get a list of
#all tag ID`s, which appear (repeated) on two rows of the matrix output by
# time_binned_interaction
tag_ids=tag_ids.astype('int')
#print "tag_ids is, ", tag_ids
overall_gaps=s.zeros(0) #this array will contain the time intervals between two consecutive
#contacts for all the tags in the system.
for i in xrange(len(tag_ids)):
track_tag_id=tag_ids[i] #i.e. iterate on all tags
contact_times=single_tag_contact_times(sliced_interactions, track_tag_id) #get
#an array with all the interactions of a given tag
#print "contact_times is, ", contact_times
tag_intervals=contact_duration_single_tag(contact_times, delta_slice) #get
#an array with the time intervals between two contacts for a given tag
#print "tag_intervals is, ", tag_intervals
overall_gaps=s.hstack((overall_gaps,tag_intervals)) #collect
#the results on all tags
#print "overall_gaps is, ", overall_gaps
p.save("many_tags_contact_duration_distr.dat", (overall_gaps) , fmt='%d')
return overall_gaps
def interval_between_contacts_single_tag(single_tag_no_rep, delta_slice):
delta_slice=int(delta_slice) #I do not need floating point arithmetic
single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
#The previous line serves the purpose of rescaling time in units of the
#chosen time interval delta_slice and to reset the time origin to the initial
#recorded time. This is simply convenient and has no effect on the statistics
#of the contact duration distribution and on the distribution of the time
#interval between consecutive contacts for a given tag
#gaps=single_tag_no_rep[1:]-single_tag_no_rep[:-1]
gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above
#gaps is now an array of integers. It either has a list of consecutive 1`s
# (which means a contact duration of delta_slice times the number of consecutive ones)
# of an entry higher than one which expresses (in units of delta_slice) the time during
#which the tag underwent no contact
#p.save("gaps.dat",gaps, fmt='%d')
find_gap=s.where(gaps != 1)
gap_distr=(gaps[find_gap]-1)*delta_slice #so, this is really the list of the
#time intervals between contacts (i.e. the time interval separating two contacts)
#of the tag I am tracking
#p.save("single_tag_gap_distr.dat", gap_distr*delta_slice, fmt='%d')
return gap_distr
def interval_between_contacts_many_tags(sliced_interactions, delta_slice):
#This function iterates interval_between_contacts_single_tag on a all the tag ID`s
#thus outputting the distribution of time intervals between any two contacts in the system.
tag_ids= n.unique1d(s.ravel(sliced_interactions[:,1:3])) #to get a list of
#all tag ID`s, which appear (repeated) on two rows of the matrix output by
# time_binned_interaction
tag_ids=tag_ids.astype('int')
#print "tag_ids is, ", tag_ids
overall_gaps=s.zeros(0) #this array will contain the time intervals between two consecutive
#contacts for all the tags in the system.
for i in xrange(len(tag_ids)):
track_tag_id=tag_ids[i] #i.e. iterate on all tags
contact_times=single_tag_contact_times(sliced_interactions, track_tag_id) #get
#an array with all the interactions of a given tag
#print "contact_times is, ", contact_times
tag_intervals=interval_between_contacts_single_tag(contact_times, delta_slice) #get
#an array with the time intervals between two contacts for a given tag
#print "tag_intervals is, ", tag_intervals
overall_gaps=s.hstack((overall_gaps,tag_intervals)) #collect
#the results on all tags
#print "overall_gaps is, ", overall_gaps
p.save("many_tags_contact_interval_distr.dat", (overall_gaps) , fmt='%d')
return overall_gaps
# def iter_process_packet_reduced_improved( start, delta, dumpfile):
# P = pcap.pcapObject()
# P.open_offline(dumpfile)
# temp_arr=s.zeros(6)-1 #this array will be used to store the results of the computations
# #and it is also the first row (temporarily) of the array (it will be removed later on
# #in this function)
# temp_arr=temp_arr.astype('int')
# i=0 #initialize counter
# while 1:
# t=P.next() #So I start reading the info for each packet in increasing time
# if (i>start and i<=start+delta): #condition about when to start and
# #finish acquiring the data
# data_array=process_packet_reduced(*t)
# if (data_array!= None): #Important! Otherwise the code would break down
# #when a packet has been rejected and nothing is read into data_array
# if (data_array[0] != -1): #that means: the value I used to initialize
# #the array (in particular time) is -1. If that is still the value,
# #then the array has never been overwritten and no contacts was detected.
# #Hence there is nothing worth saving
# temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
# #temp_arr has an extra row! (the initialized value of temp_arr we spoke
# #of before)
# i=i+1 #increase the counter
# if (i>start+delta):
# print "End of the game"
# break
# temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
# #and was used to initialize the array
# return temp_arr
# def process_packet(pktlen, data, timestamp):
# #print time.ctime(timestamp), pktlen, len(data)
# payload = data[44:44+16]
# decrypted_data = xxtea.decode(payload)
# station_id = struct.unpack('!L', data[28:28+4])[0]
# tstamp = int(timestamp)
# proto = struct.unpack("!B", decrypted_data[0])[0]
# if proto == PROTO_CONTACTREPORT:
# (proto, id, flags, id1, id2, id3, id4, seq, crc) = struct.unpack("!BHBHHHHHH", decrypted_data)
# if crc != xxtea.crc16(decrypted_data[:14]):
# print 'rejecting packet from 0x%08x on CRC' % station_id
# return
# contact = Contact(tstamp, station_id, id, seq, [id1, id2, id3, id4], flags)
# elif proto == PROTO_BEACONTRACKER_NEW:
# (proto, id, flags, strength, id_last_seen, boot_count, reserved, seq, crc) = struct.unpack("!BHBBHHBLH", decrypted_data)
# if crc != xxtea.crc16(decrypted_data[:14]):
# print 'rejecting packet from 0x%08x on CRC' % station_id
# return
# sighting = Sighting(tstamp, station_id, id, seq, strength, flags, last_seen=id_last_seen, boot_count=boot_count)
# print sighting
#The following function is used to keep track of the contacts. Function iter_process_packet_reduced
#returns a list of times and contacts between the tags. However, this is often redundant (i.e. the same
#contact between e.g. tag 1 and 20 could have been recorded simulataneously (same timestamp) by
#different detectors, but the physical event is the same.)
def order_count(A):
d = {}
for r in A:
t = tuple(r)
d[t] = d.get(t,0) + 1
# The dict d now has the counts of the unique rows of A.
B = n.array(d.keys()) # The unique rows of A
C = n.array(d.values()) # The counts of the unique rows
return B,C
def time_binned_interaction(tag_tag, delta_slice):
#tag_tag is a an array containing the tag-tag interaction
#already in a binary form. It has a rather simple structure
# timestamp tag A tag B
# i.e. it has only three columns and the id of tag A is always
#lower than the one of tag B (a convention for "sorting" the contacts)
time_raw=tag_tag[:,0] #time as taken from the dump file
#(first column of tag_tag [labelled with a zero])
time_grid=s.arange(min(time_raw),(max(time_raw)+1.), delta_slice) #I define a
#new time vector with a spacing given by delta slice. I will look at snapshot of the
#system taken every delta_slice times (or better: whatever interactions I record in a time
#span delta_slice, will contribute to the list of recorded interactions [i.e.adjacency
#list] in the time interval [t, t+delta_slice]).
#print "time_grid is, ", time_grid
slice_lump=s.zeros(3)-1. #again, this is simply an initialization
#the array above will contain views of the network recorded every time_slice (with the
#meaning expressed above).
tag_tag=tag_tag[:,1:3] #now I have an array for time and one for binary interaction
#(tag_tag now is an array with 2 columns only containing the data for the binary interaction)
print "s.shape(tag_tag is), ", s.shape(tag_tag)
for i in xrange(len(time_grid)-1):
sel_slice=s.where((time_raw>=time_grid[i]) & (time_raw<time_grid[i+1])) #i.e. this
#selects the binary interaction in a generic delta_slice time interval that scans
#through all recorded times.
my_slice=tag_tag[sel_slice, :] # binary contacts in the bins
my_slice=my_slice[0] #this is a bit tricky, but it looks the only ways to
#extract the binary interactions (from an array with TWO columns) in delta_slice
#Now I have to be careful and introduce an extra if condition: it is possible that
#for a given time slice, no contacts are detected. This case has to be spotted,
#otherwise the routine would crash.
if (s.shape(my_slice)[0]>0): #i.e. some contacts have been detected
#print "my_slice is, ", my_slice
#print "s.shape(my_slice) is, ", s.shape(my_slice)
#Now I replace my slice with an array where no line is ever repeated
#This is the underlying philosophy: I read all the binary interactions taking place in the
#system among all tags in a given delta_slice.
#Now, first of all the interactions are already ordered in increasing order (i.e.
#in each line I can have 123 345 but NOT 345 123) (but this is done by some other routine)
#I could have twice the same entry e.g.
# e.g.
# 123 345
# 123 345
#if e.g. the same two tags interact twice in the same delta_slice.
#NB: the SAME tags, but TWO different interactions in the same time slice (with two
#different associated timestamps [too different to be within the experimental tolerance])
#However, since I want a single "view" of the system in a given delta_slice, I will get rid
#of duplicate entries like the one above. I will simply say that there has been a contact
#between tag 123 and 345 in delta_slice (hence there will be adjacent in the network).
#using the tag_tag contact list, I fundamentally get a series of adjacency lists as
#time progresses
my_slice=order_count(my_slice)[0] #this way I get the unique rows of tag_tag in
# a given delta_slice
my_time_slice=s.ones(len(my_slice))*time_grid[i+1] #I choose to label time
#by the upper boundary of the [time_grid[i], time_grid[i+1]] time interval
#(this is a convention which can be changed).
slice_lump_temp=s.transpose(s.vstack((my_time_slice,s.transpose(my_slice))))
slice_lump=s.vstack((slice_lump, slice_lump_temp))
#the previous two lines are just a (complicated) way to pack together the list of
#binary contacts (adjacency matrices) as time progresses
#Now get rid of the initialization
#print "slice_lump is, ", slice_lump
slice_lump=slice_lump[1:len(slice_lump), :]
#p.save("sliced_dynamics_self_loops.dat",slice_lump, fmt='%d') #to save everything \
#as an array of integers
#Now get rid of the self-loops
sel=s.where(slice_lump[:,1]!=slice_lump[:,2])
slice_lump_no_loops=slice_lump[sel,:][0] #again, careful since this can be tricky!
#print "slice_lump_new is, ", slice_lump
#p.save("sliced_dynamics.dat",slice_lump, fmt='%d')
return [slice_lump, slice_lump_no_loops]
def time_binned_interaction_new(tag_tag, delta_slice, delta_label, start):
#tag_tag is a an array containing the tag-tag interaction
#already in a binary form. It has a rather simple structure
# timestamp tag A tag B
# i.e. it has only three columns and the id of tag A is always
#lower than the one of tag B (a convention for "sorting" the contacts)
time_grid=delta_label+start
#I define a
#new time vector with a spacing given by delta slice. I will look at snapshot of the
#system taken every delta_slice times (or better: whatever interactions I record in a time
#span delta_slice, will contribute to the list of recorded interactions [i.e.adjacency
#list] in the time interval [t, t+delta_slice]).
#print "time_grid is, ", time_grid
slice_lump=s.zeros(3)-1. #again, this is simply an initialization
#the array above will contain views of the network recorded every time_slice (with the
#meaning expressed above).
tag_tag=tag_tag[:,1:3] #now I have an array for time and one for binary interaction
#(tag_tag now is an array with 2 columns only containing the data for the binary interaction)
#print "s.shape(tag_tag is), ", s.shape(tag_tag)
#for i in xrange(len(time_grid)-1):
#sel_slice=s.where((time_raw>=time_grid[i]) & (time_raw<time_grid[i+1])) #i.e. this
#selects the binary interaction in a generic delta_slice time interval that scans
#through all recorded times.
#my_slice=tag_tag[sel_slice, :] # binary contacts in the bins
#my_slice=my_slice[0] #this is a bit tricky, but it looks the only ways to
#extract the binary interactions (from an array with TWO columns) in delta_slice
my_slice=tag_tag
#Now I have to be careful and introduce an extra if condition: it is possible that
#for a given time slice, no contacts are detected. This case has to be spotted,
#otherwise the routine would crash.
if (s.shape(my_slice)[0]>0): #i.e. some contacts have been detected
#print "my_slice is, ", my_slice
#print "s.shape(my_slice) is, ", s.shape(my_slice)
#Now I replace my slice with an array where no line is ever repeated
#This is the underlying philosophy: I read all the binary interactions taking place in the
#system among all tags in a given delta_slice.
#Now, first of all the interactions are already ordered in increasing order (i.e.
#in each line I can have 123 345 but NOT 345 123) (but this is done by some other routine)
#I could have twice the same entry e.g.
# e.g.
# 123 345
# 123 345
#if e.g. the same two tags interact twice in the same delta_slice.
#NB: the SAME tags, but TWO different interactions in the same time slice (with two
#different associated timestamps [too different to be within the experimental tolerance])
#However, since I want a single "view" of the system in a given delta_slice, I will get rid
#of duplicate entries like the one above. I will simply say that there has been a contact
#between tag 123 and 345 in delta_slice (hence there will be adjacent in the network).
#using the tag_tag contact list, I fundamentally get a series of adjacency lists as
#time progresses
my_slice=order_count(my_slice)[0] #this way I get the unique rows of tag_tag in
# a given delta_slice
my_time_slice=s.ones(len(my_slice))*time_grid #I choose to label time
#by the upper boundary of the [time_grid[i], time_grid[i+1]] time interval
#(this is a convention which can be changed).
slice_lump_temp=s.transpose(s.vstack((my_time_slice,s.transpose(my_slice))))
slice_lump=s.vstack((slice_lump, slice_lump_temp))
#the previous two lines are just a (complicated) way to pack together the list of
#binary contacts (adjacency matrices) as time progresses
#Now get rid of the initialization
#print "slice_lump is, ", slice_lump
slice_lump=slice_lump[1:len(slice_lump), :]
#p.save("sliced_dynamics_self_loops.dat",slice_lump, fmt='%d') #to save everything \
#as an array of integers
#Now get rid of the self-loops
sel=s.where(slice_lump[:,1]!=slice_lump[:,2])
slice_lump_no_loops=slice_lump[sel,:][0] #again, careful since this can be tricky!
#print "slice_lump_new is, ", slice_lump
#p.save("sliced_dynamics.dat",slice_lump, fmt='%d')
return [slice_lump, slice_lump_no_loops]
def time_binned_interaction_new_reduced(tag_tag, delta_slice, delta_label, start):
#tag_tag is a an array containing the tag-tag interaction
#already in a binary form. It has a rather simple structure
# timestamp tag A tag B
# i.e. it has only three columns and the id of tag A is always
#lower than the one of tag B (a convention for "sorting" the contacts)
time_grid=delta_label+delta_slice #+start
#I define a
#new time vector with a spacing given by delta slice. I will look at snapshot of the
#system taken every delta_slice times (or better: whatever interactions I record in a time
#span delta_slice, will contribute to the list of recorded interactions [i.e.adjacency
#list] in the time interval [t, t+delta_slice]).
#print "time_grid is, ", time_grid
slice_lump=s.zeros(3)-1. #again, this is simply an initialization
#the array above will contain views of the network recorded every time_slice (with the
#meaning expressed above).
tag_tag=tag_tag[:,1:3] #now I have an array for time and one for binary interaction
#(tag_tag now is an array with 2 columns only containing the data for the binary interaction)
#print "s.shape(tag_tag is), ", s.shape(tag_tag)
#for i in xrange(len(time_grid)-1):
#sel_slice=s.where((time_raw>=time_grid[i]) & (time_raw<time_grid[i+1])) #i.e. this
#selects the binary interaction in a generic delta_slice time interval that scans
#through all recorded times.
#my_slice=tag_tag[sel_slice, :] # binary contacts in the bins
#my_slice=my_slice[0] #this is a bit tricky, but it looks the only ways to
#extract the binary interactions (from an array with TWO columns) in delta_slice
my_slice=tag_tag
#Now I have to be careful and introduce an extra if condition: it is possible that
#for a given time slice, no contacts are detected. This case has to be spotted,
#otherwise the routine would crash.
if (s.shape(my_slice)[0]>0): #i.e. some contacts have been detected
#print "my_slice is, ", my_slice
#print "s.shape(my_slice) is, ", s.shape(my_slice)
#Now I replace my slice with an array where no line is ever repeated
#This is the underlying philosophy: I read all the binary interactions taking place in the
#system among all tags in a given delta_slice.
#Now, first of all the interactions are already ordered in increasing order (i.e.
#in each line I can have 123 345 but NOT 345 123) (but this is done by some other routine)
#I could have twice the same entry e.g.
# e.g.
# 123 345
# 123 345
#if e.g. the same two tags interact twice in the same delta_slice.
#NB: the SAME tags, but TWO different interactions in the same time slice (with two
#different associated timestamps [too different to be within the experimental tolerance])
#However, since I want a single "view" of the system in a given delta_slice, I will get rid
#of duplicate entries like the one above. I will simply say that there has been a contact
#between tag 123 and 345 in delta_slice (hence there will be adjacent in the network).
#using the tag_tag contact list, I fundamentally get a series of adjacency lists as
#time progresses
my_slice=order_count(my_slice)[0] #this way I get the unique rows of tag_tag in
# a given delta_slice
my_time_slice=s.ones(len(my_slice))*time_grid #I choose to label time
#by the upper boundary of the [time_grid[i], time_grid[i+1]] time interval
#(this is a convention which can be changed).
slice_lump_temp=s.transpose(s.vstack((my_time_slice,s.transpose(my_slice))))
slice_lump=s.vstack((slice_lump, slice_lump_temp))
#the previous two lines are just a (complicated) way to pack together the list of
#binary contacts (adjacency matrices) as time progresses
#Now get rid of the initialization
#print "slice_lump is, ", slice_lump
slice_lump=slice_lump[1:len(slice_lump), :]
#p.save("sliced_dynamics_self_loops.dat",slice_lump, fmt='%d') #to save everything \
#as an array of integers
#Now get rid of the self-loops
#sel=s.where(slice_lump[:,1]!=slice_lump[:,2])
#slice_lump_no_loops=slice_lump[sel,:][0] #again, careful since this can be tricky!
#print "slice_lump_new is, ", slice_lump
#p.save("sliced_dynamics.dat",slice_lump, fmt='%d')
return slice_lump
class OpenBeaconSystem:
max_time = -1
def __init__(self):
self.Contact = []
self.ContactHashSet = set()
def process_contact(self, c):
self.max_time = c.t
# discard multiple contact instances reported by different stations (and a 1-second timestamp difference)
h = c.get_hash()
if h in self.ContactHashSet:
return
self.ContactHashSet.add(h)
self.Contact.append(c)
mylog.write('%s\n' % c)
def cleanup_contacts(self):
self.max_time = int(time.time())
self.ContactFresh = filter(lambda c: (self.max_time - c.t) < DELTA, self.Contact)
self.ContactHashSet = set( [c.get_hash() for c in self.ContactFresh] )
self.Contact = []
def get_contacts(self, C):
print ' #contacts:', len(C)
data = pickle.dumps(
[ (c.t, c.ip, c.id, c.seq, c.flags, c.seen_id, c.seen_pwr, c.seen_cnt) for c in C]
) + '\0'
return data
# ------------------------------------------------------------------------------------
start=1243839974 # 1240068738
delta=3600*24 #i.e. I read 4 hours of data
delta_t=5
time_dep =0 #whether I want to get a dynamic list of the interaction or "freeze"
#everything in a single file
delta_slice=20
dumpfile="log_1243839973.pcap"
track_tag_id=1101 #id of the tag whose whose interaction I want to track
iter_local( start, delta, dumpfile, delta_t, delta_slice,time_dep)
# test=iter_process_packet_reduced_improved_new( start, delta,dumpfile, delta_t )
# p.save("detection_arr.dat",test, fmt='%d') #to save everything as an array of integers
# my_graph_list=output_graph_improved(test,time_dep)
# p.save("tag-tag-binary-contacts.dat",my_graph_list, fmt='%d' )
# sliced_interactions=time_binned_interaction(my_graph_list, delta_slice)
# p.save("sliced_dynamics_self_loops.dat",sliced_interactions[0], fmt='%d')
# p.save("sliced_dynamics.dat",sliced_interactions[1], fmt='%d')
# ##################################################################################
# ##################################################################################
# ##################################################################################
# ##################################################################################
# #Now some tests to be conducted on a single tag
# #which I commented since I verified that the code passes them.
# #single_tag_dyn=single_tag_contact_times(sliced_interactions, track_tag_id)
# # interval_single=interval_between_contacts_single_tag(single_tag_dyn, delta_slice)
# # p.save("interval_single.dat",interval_single, fmt='%d')
# # duration_single=contact_duration_single_tag(single_tag_dyn, delta_slice)
# # p.save("duration_single.dat",duration_single, fmt='%d')
# #duration_single2=contact_duration_and_interval_single_tag(single_tag_dyn, delta_slice)[0]
# #p.save("duration_single2.dat",duration_single2, fmt='%d')
# #interval_single2=contact_duration_and_interval_single_tag(single_tag_dyn, delta_slice)[1]
# #p.save("interval_single2.dat",interval_single2, fmt='%d')
# # p.save("detection_arr.dat",test, fmt='%d')
# #End of the tests
# ##################################################################################
# ##################################################################################
# ##################################################################################
# ##################################################################################
# # interval_between_contacts_many_tags(sliced_interactions, delta_slice)
# # contact_duration_many_tags(sliced_interactions, delta_slice)
# #the following function now replaces the previous two (tested that I get the same results)
# #I modified the calls to these functions since now sliced_interactions is a list
# contact_duration_and_interval_many_tags(sliced_interactions[0], delta_slice)
# hash_and_time=couple_hash_table(sliced_interactions[0])
# detect_binary_contacts_among_same_two_tags(hash_and_time, delta_slice)
print "So far so good"