OSDN > Entwickler > larry77 > Kammer > myprojects-hg-reborn > File Details
myprojects-hg-reborn
Fork

(Original-Repository, kein Fork Herkunft)
File Info

Rev.	505c60f2d073d3f91241438471cdf057b0cc1845
Größe	78,895 Bytes
Zeit	2009-07-16 20:16:42
Autor	lorenzo
Log Message	A trivial modification: in each time slice I was saving time starting from twice the initial time. This does not have any effect on the statistics (I was just rescaling t=0), but was rather odd. Now I am consistent with what Ciro does.
Content

Im Rohformat exportieren
#!/usr/bin/env python
import sys, string, time
import pcap
import struct
import xxtea

#I import now some extra modules which come handy for array manipulation/visualization and storage

import scipy as s
import pylab as p
import numpy as n


# server ports
UDP_PORT = 2342

# OpenBeacon low-level protocols
PROTO_CONTACTREPORT = 69
PROTO_BEACONTRACKER_NEW = 24

# tag ID ranges and ignored tags
TAG_ID_MIN = 500
TAG_ID_MAX = 2047

# crypto key
#TEA_CRYPTO_KEY = ( 0xd2e3fd73, 0xfa70ca9c, 0xe2575826, 0x0fe09946 )

TEA_CRYPTO_KEY =(0xf6e103d4, 0x77a739f6, 0x65eecead, 0xa40543a9)

xxtea.set_key( *TEA_CRYPTO_KEY )


def output_graph(data_arr):
    data_arr=data_arr[:,2:7]
    return data_arr


def output_graph_improved(data_arr, tag_no_rep_out):

    #This function carries out a few tasks: first of all, it reads the output array
    #containing all the contacts that took place in a certain time-slice

    temp_t_list=data_arr[:,0]  #I select the time column of the array containing the raw
    #data

    time_list=s.zeros(0) #this is the array I will use to store the times
    time_list=time_list.astype("int")

    #The idea is the following: if I have

    #timestamp      ID    ID1  ID2  ID3  ID4

    #1234555        128  1457  124  1223  -1

    #this means that tag 128 had 3 interactions at that timestamp. Hence one should bear
    #that in mind. t_list will therefore report that timestamp three times to match
    #the fact that in the ordered record ID will appear three times (for that timestamp)

    
    
    data_arr=data_arr[:,2:7] #i.e. get rid of the timestamps and keep only the ID of the
    #tag you are tracking and of the other tags it may have interacted with

    node_list=s.zeros(2)-1 #initialization with an impossible value
    node_list=node_list.astype("int")
    
    for i in xrange(len(data_arr)): #len(data_arr) is the number of rows of data_arr
        sel=s.where(data_arr[i,:] != -1) #i.e. wherever in that row a contact has been registered
        #(ID1, ID2, ID3, ID4)
        contact=data_arr[i, :][sel] #Now I extract this datum
        
        #A tag may have interacted with 1 to 4 tags simultaneously

        #a contact like
        # 12  34  44  55  88
        # ID  ID2 ID3 ID3 ID4

        #needs to become
        # 12 34
        # 12 44
        # 12 55
        # 12 88

        #and this is done by the following if conditions
        
        if (len(contact)==2): #i.e. ID + ID1
            node_list=s.vstack((node_list,contact))

            time_list=s.hstack((time_list,temp_t_list[i]))
            
        elif (len(contact)==3):
            node_list=s.vstack((node_list,contact[0:2]))
            node_list=s.vstack((node_list,contact[0:3:2]))

            #2 contacts, add that time twice
            time_list=s.hstack((time_list,temp_t_list[i],temp_t_list[i]))
            #time_list=s.hstack((time_list,temp_t_list[i]))

        elif (len(contact)==4):
            #3 contacts, add that time three times
            time_list=s.hstack((time_list,temp_t_list[i],temp_t_list[i],temp_t_list[i]))
#             time_list=s.hstack((time_list,temp_t_list[i]))
#             time_list=s.hstack((time_list,temp_t_list[i]))


            node_list=s.vstack((node_list,contact[0:2]))
            node_list=s.vstack((node_list,contact[0:4:2]))
            node_list=s.vstack((node_list,contact[0:4:3]))
        elif (len(contact)==5): #i.e. ID +ID1, ID2, ID3, ID4

            #4 contacts, add that time 4 times
            time_list=s.hstack((time_list,temp_t_list[i],temp_t_list[i],temp_t_list[i],temp_t_list[i]))
#             time_list=s.hstack((time_list,temp_t_list[i]))
#             time_list=s.hstack((time_list,temp_t_list[i]))
#             time_list=s.hstack((time_list,temp_t_list[i]))


            node_list=s.vstack((node_list,contact[0:2]))
            node_list=s.vstack((node_list,contact[0:4:2]))
            node_list=s.vstack((node_list,contact[0:4:3]))
            node_list=s.vstack((node_list,contact[0:5:4]))
            
            
            
    node_list=node_list[1:len(node_list),:] #to remove the set of -1 that fill the upper row
    #used to initialize node_list
    node_list=node_list.astype('int') #convert to integer

    node_list=s.sort(node_list)  #to make entries like 1 5 and 5 1 look the same

    #Save results as binary contacts with interaction time
    tag_tag_bin=s.transpose(s.vstack((time_list,s.transpose(node_list))))
    
  

    node_list_no_rep=order_count(node_list)[0] #to get rid of duplicated entries



    
    if (tag_no_rep_out==1):
        #p.save("graph_list_no_rep.dat",node_list_no_rep , fmt='%d') #this way I get a list of
        #social contacts without any repetition of overcounting i-->j and j-->i interaction

    
        return node_list_no_rep
    else :
        #p.save("tag-tag-binary-contacts.dat",tag_tag_bin, fmt='%d' )

        
        return tag_tag_bin
        
    
def dump_matrix( adj_list,slice_label):
    #print "adj_list is, ", adj_list

    sel_mat=adj_list[:,1:3]
    sel_mat=sel_mat.astype("int")

    #print "sel_mat is, ", sel_mat
    
    my_mat=s.zeros((2000,2000))
    #adj_list.astype("int")
    my_mat[sel_mat]=1
    #print "my_mat is, ", my_mat

    temp_name="adj_mat%05d"%slice_label
     
    p.save(temp_name,my_mat, fmt='%d' )

    return 0




class Sighting:
    def __init__(self, tstamp, ip, id, seq, strength, flags, last_seen=0, boot_count=0):
        self.t = tstamp
        self.ip = ip
        self.id = id
        self.seq = seq
        self.strength = strength
        self.flags = flags
        self.last_seen = last_seen
        self.boot_count = boot_count

    def __repr__(self):
        return 'S %ld 0x%08x %d 0x%08x %d %d %s %s' % (self.t, self.ip, self.id, self.seq, self.strength, self.flags, self.last_seen, self.boot_count)

class Contact:
    def __init__(self, tstamp, ip, id, seq, id_seen, flags):
        self.t = tstamp
        self.ip = ip
        self.id = id
        self.seq = seq
        self.flags = flags
        self.seen_id = []
        self.seen_pwr = []
        self.seen_cnt = []

        for id2 in id_seen:
            if not id2:
                break

            self.seen_id.append(id2 & 0x07FF)
            self.seen_pwr.append(id2 >> 14)
            self.seen_cnt.append((id2 >> 11) & 0x07)

    def get_hash(self):
        return hash( (self.id, self.seq) )

    def __repr__(self):
        clist = ""
        for (id2, pwr, count) in zip(self.seen_id, self.seen_pwr, self.seen_cnt):
            clist += " [%d(%d) #%d]" % (id2, pwr, count)
        
        return 'C %ld 0x%08x %d 0x%08x %d%s' % (self.t, self.ip, self.id, self.seq, self.flags, clist)

        #return 'C  0x%08x' %  (self.seq)


#A brief description of the following class: it is a modified (probably not very well written)
#version of Contact class. It aims at reading the data from a Contact protcol packet and storing
#the relevant info as a numpy array for further manypulation (not performed by this class).


class Contact_reduced:
    def __init__(self, tstamp, ip, id, seq, id_seen, flags):
        self.t = tstamp
        self.ip = ip
        self.id = id
        self.seq = seq
        self.flags = flags
        self.seen_id = []
        self.seen_pwr = []
        self.seen_cnt = []

        for id2 in id_seen:
            #a fundamental modification now: I am not disregarding the case of
            #a packet which reports to be on his own.

            
            if not id2:
                break

            self.seen_id.append(id2 & 0x07FF)
            self.seen_pwr.append(id2 >> 14)
            self.seen_cnt.append((id2 >> 11) & 0x07)

        #print "self.seen_id, len(self.seen_id) is, ", self.seen_id, len(self.seen_id)
            
    def data_out_2(self): #This method has been added to the class. It saves 7 overall quantities:
        #time, hash((id,seq)), id of the tag I am tracking, id of 4 other tags it may be
        #in contact with.
        #A choice is made: we get rid of all the contacts taking place at high powers (pwr=2)
        #since they amount to interactions between far away tags. The 6 quantities are stored
        #in a numpy array of integers (I see no point in using floating points since time is
        #discrete and the numbers labelling the tags are integers).

        
        data_arr=s.zeros(7)-1 #this is the array that will contain the afore-mentioned data
        #However, I initialize it with an "impossible" value to make sure I know when it is
        #overwritten with useful information
        
        clist = ""
        id_list=""
        
        i=0 #I initialize to zero a counter which saves the info in the "right"
        #position inside data_arr
        
        #for (id2, pwr, count) in zip(self.seen_id, self.seen_pwr, self.seen_cnt):

        print "self.t is, ", self.t
        for (id2, pwr, count) in zip(self.seen_id, self.seen_pwr, self.seen_cnt):

            print "self.seen_id is, ", self.seen_id
            print "self.seen_pwr is, ", self.seen_pwr
            print "id2 is, ", id2
            print "pwr is, ", pwr


            clist += " [%d(%d) #%d]" % (id2, pwr, count)
            
            #print "clist, len(clist) is, ", clist, len(clist)
            
            #In the following I choose when to overwrite the data_arr
            
            #The following condition is twofold: first of all a contact
            #between two tags needs to have been detected (hence the length of clist
            #needs to be >0)
            #and I am also requiring 
            #the connection to take place at radio powers smaller than 2 i.e. the two tags are
            #physically close
            
            #if (len(clist)>0 and self.seen_pwr[i]<2):
            #if (len(self.seen_id)>0 and self.seen_pwr[i]<2):
            if (len(self.seen_id)>0 and pwr<2):
               
            #if (len(self.seen_id)>0 and self.seen_pwr<2):
                
                data_arr[0]=self.t  #saves time
                data_arr[1]=hash((self.id, self.seq)) #using the hash function, I condense the
                #info about the packet id and seq numbers into a single number
                #NB:hash(a,b)!= hash(b,a) i.e. the hash function is not symmetric in its arguments

                data_arr[2]=self.id
                
                data_arr[i+3]=id2  #saves (whenever they exist) the IDs of the tags
                #in contact with the one I am tracking. This entry will be zero if
                #no connection is detected.

                if (data_arr[0]==1243875754):
                    print "data_arr is, ", data_arr

                
                i=i+1

        if (len(self.seen_id)==0):
            #print "self interaction!!"
            print "self_interaction and self.seen_id is, ", self.seen_id

            
            data_arr[0]=self.t  #save time
            data_arr[1]=hash((self.id, self.seq))
            data_arr[2]=self.id #save the ID of the tag I am tracking
            data_arr[3]=self.id  #save (whenever they exist) the IDs of the tags

            #print "data_arr for self-interaction is, ", data_arr.astype('int')
                
        #convert everything to integer (I really have no need for double precision numbers)
        data_arr=data_arr.astype('int')

            
        return data_arr



    def data_out_3(self): #This method has been added to the class. It saves 7 overall quantities:
        #time, hash((id,seq)), id of the tag I am tracking, id of 4 other tags it may be
        #in contact with.
        #A choice is made: we get rid of all the contacts taking place at high powers (pwr=2)
        #since they amount to interactions between far away tags. The 6 quantities are stored
        #in a numpy array of integers (I see no point in using floating points since time is
        #discrete and the numbers labelling the tags are integers).

        
        data_arr=s.zeros(7)-1 #this is the array that will contain the afore-mentioned data
        #However, I initialize it with an "impossible" value to make sure I know when it is
        #overwritten with useful information

        #print "self.t is, ", self.t

        id_arr=s.asarray(self.seen_id)
        id_arr=id_arr.astype("int")
        pwr_arr=s.array(self.seen_pwr)
        pwr_arr=pwr_arr.astype("int")
        cnt_arr=s.asarray(self.seen_cnt)
        cnt_arr=cnt_arr.astype("int")

        if (len(id_arr)==0): #this is the case when the tag reports being alone
            
            data_arr[0]=self.t  #save time
            data_arr[1]=hash((self.id, self.seq))
            data_arr[2]=self.id #save the ID of the tag I am tracking
            data_arr[3]=self.id  #save (whenever they exist) the IDs of the tags

        if (len(id_arr)>0): #now the tag has estabilished some contacts, but I need to
            #remove those which took place at high power
            
            test_pwr_arr=s.ones(len(pwr_arr)).astype("int")*2
            #this array correspond to what I would see if all the contacts took place at
            #high power


        
            if ((pwr_arr==test_pwr_arr).all()): #in this case all the contacts have
                #been estabilished at high power hence the tag is considered as by itself
                
                data_arr[0]=self.t  #save time
                data_arr[1]=hash((self.id, self.seq))
                data_arr[2]=self.id #save the ID of the tag I am tracking
                data_arr[3]=self.id  #save (whenever they exist) the IDs of the tags
            else: #now some contacts have taken place at low power and need to be recorded

                sel=s.where(pwr_arr==1) #that is to say, select the contact which took place
                #at low power
                id_arr_new=id_arr[sel] #and get the IDs of the corresponding tags
                

                data_arr[0]=self.t  #saves time
                data_arr[1]=hash((self.id, self.seq)) #using the hash function, I condense the
                #info about the packet id and seq numbers into a single number
                #NB:hash(a,b)!= hash(b,a) i.e. the hash function is not symmetric in its arguments

                data_arr[2]=self.id
                
                data_arr[3:(3+len(id_arr_new))]=id_arr_new
                #saves (whenever they exist) the IDs of the tags
                #in contact with the one I am tracking. This entry will be zero if
                #no connection is detected.

        data_arr=data_arr.astype('int')

            
        return data_arr





def my_hash(arr):
    my_hash=hash((arr[0], arr[1]))

    return my_hash


def couple_hash_table(sliced_data):
    hash_list=s.arange(len(sliced_data))

    for i in xrange(len(sliced_data)):
        hash_list[i]=my_hash(sliced_data[i,1:3 ])

    
    hash_and_time=s.transpose(s.vstack((sliced_data[:, 0],hash_list)))

    p.save("hash_and_time.dat", hash_and_time,fmt='%d')

    #the aim of this function is to identify each reported binary contact with a single number.
    #then, instead of looking for couples of interacting tags, I will simply look for repeated
    #occurrences of a number

    return hash_and_time

def detect_binary_contacts_among_same_two_tags(hash_and_time, delta_slice):

    couple_list_unique=s.unique1d(hash_and_time[:,1]) #then I have the identifier of each
    # couple of interacting tags
    couple_list=hash_and_time[:,1]

    #print "couple_list_unique is, ", couple_list_unique

    time_arr=hash_and_time[:,0] #important! Measure everything in units
    #of delta_slice
    
    couple_interaction_duration=s.zeros(0) #this array will store the times at which the SAME
    #i-j tags interact.

    couple_interaction_interval=s.zeros(0)

    for i in xrange(len(couple_list_unique)):
        chosen_couple=couple_list_unique[i]
        sel_chosen_couple_times=s.where(couple_list==chosen_couple)
        chosen_couple_times=time_arr[sel_chosen_couple_times]
        #print "i is, ", i
        #print "chosen_couple_times is, ", chosen_couple_times
        chosen_couple_contact_duration= \
             contact_duration_and_interval_single_couple(chosen_couple_times, delta_slice)[0]

        chosen_couple_contact_interval= \
             contact_duration_and_interval_single_couple(chosen_couple_times, delta_slice)[1]

        couple_interaction_duration=s.hstack((couple_interaction_duration,\
                                              chosen_couple_contact_duration))

        couple_interaction_interval=s.hstack((couple_interaction_interval,\
                                              chosen_couple_contact_interval))


    p.save("couple_interaction_duration_times.dat", couple_interaction_duration,fmt='%d')

    p.save("couple_interaction_interval_times.dat", couple_interaction_interval,fmt='%d')

    
    return couple_interaction_duration

#A new class I added: it is designed to handle the output of the Contact_reduced class.
#It is a downsizing of process_packet. It handles only the case of a Contact protocol
#and retrieves the data output by Contact_reduced

def process_packet_reduced(pktlen, data, timestamp):
    #print time.ctime(timestamp), pktlen, len(data)
    #payload = data[44:44+16]
    payload = data[42:42+16]
    decrypted_data = xxtea.decode(payload)
    #station_id = struct.unpack('!L', data[28:28+4])[0]

    station_id_low = struct.unpack('!H', data[28:28+2])[0]
    station_id_high = struct.unpack('!H', data[30:30+2])[0]
    station_id = (station_id_high << 16) | station_id_low 
    
    tstamp = int(timestamp)

    data_array=s.zeros(7)-1 #this array is simply initialized here; it will be overwritten
    # with the results I calculate. Even the length of this array does not matter as long
    #as it exists outside the loop

    data_array=data_array.astype('int')
    #Better to use integers, since each tag is really labelled
    #by an integer number

    #print "tstamp is, ", tstamp
    proto = struct.unpack("!B", decrypted_data[0])[0]

    if proto == PROTO_CONTACTREPORT:
        (proto, id, flags, id1, id2, id3, id4, seq, crc) = struct.unpack("!BHBHHHHHH", decrypted_data)
        
        if crc != xxtea.crc16(decrypted_data[:14]):
            #print 'rejecting packet from 0x%08x on CRC' % station_id
            return

        contact = Contact_reduced(tstamp, station_id, id, seq, [id1, id2, id3, id4], flags)
        #data_array=contact.data_out() #automatically stored as an array of integers
        
        #data_array=contact.data_out_2() #automatically stored as an array of integers
        #but it combines packet ID and seq into a single integer using the hash table


        data_array=contact.data_out_3() #automatically stored as an array of integers
        #but it combines packet ID and seq into a single integer using the hash table

        #print "Now data_array is, ", data_array.astype("int")

    return data_array.astype("int")




# def process_packet_reduced_improved(pktlen, data, timestamp):
#     #print time.ctime(timestamp), pktlen, len(data)
#     payload = data[44:44+16]
#     decrypted_data = xxtea.decode(payload)
#     station_id = struct.unpack('!L', data[28:28+4])[0]
#     tstamp = int(timestamp)

#     data_array=s.zeros(6)-1 #this array is simply initialized here; it will be overwritten
#     # with the results I calculate

#     data_array= data_array.astype('int')
#     #Better to use integers, since each tag is really labelled
#     #by an integer number

#     #print "tstamp is, ", tstamp
#     proto = struct.unpack("!B", decrypted_data[0])[0]

#     if proto == PROTO_CONTACTREPORT:
#         (proto, id, flags, id1, id2, id3, id4, seq, crc) = struct.unpack("!BHBHHHHHH", decrypted_data)
        
#         if crc != xxtea.crc16(decrypted_data[:14]):
#             print 'rejecting packet from 0x%08x on CRC' % station_id
#             return

#         contact = Contact_reduced(tstamp, station_id, id, seq, [id1, id2, id3, id4], flags)
#         data_array=contact.data_out() #automatically stored as an array of integers

#     return data_array






# #The process_packet_reduced function looks into one packet and retrieves the info which
# #ends up into a numpy array. The following function iterates this on a set of times.
# #The info is collected only when a Contact has been detected and the info at different
# #time-steps is stitched together.


# def iter_process_packet_reduced( start, delta, dumpfile):
    
#     P = pcap.pcapObject()

#     P.open_offline(dumpfile)


#     temp_arr=s.zeros(6)-1 #this array will be used to store the results of the computations
#     #and it is also the first row (temporarily) of the array (it will be removed later on
#     #in this function)
    
#     temp_arr=temp_arr.astype('int')

#     i=0  #initialize counter

#     while 1:

#         t=P.next() #So I start reading the info for each packet in increasing time
    
#         if (i>start and i<=start+delta): #condition about when to start and
#             #finish acquiring the data
#             data_array=process_packet_reduced(*t)

#             if (data_array!= None): #Important! Otherwise the code would break down
#                 #when a packet has been rejected and nothing is read into data_array
            
#                 if (data_array[0] != -1): #that means: the value I used to initialize
#                     #the array (in particular time) is -1. If that is still the value,
#                     #then the array has never been overwritten and no contacts was detected.
#                     #Hence there is nothing worth saving
                
#                     temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
#                     #temp_arr has an extra row! (the initialized value of temp_arr we spoke
#                     #of before)
            
#         i=i+1 #increase the counter

#         if (i>start+delta):

#             print "End of the game"

#             break
        
#     temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
#     #and was used to initialize the array
    
#     return temp_arr

def iter_process_packet_reduced_improved_new( start, delta, dumpfile, delta_t):

    #I need to introduce some extra checks to determine whether a packet really represents
    # a new event or not. In general, there are three things I should take into account
    #1)-2) whether the packets have the same id (of the examined tags) and sequence numbers
    #(of the broadcast packet)
    #3) whether the packets have a similar time-stamp. Due to delays in the broadcast,
    # two packets emitted with a slight (say, 1 sec) time-gap still represent the
    #same contact, if they have the same id and seq. However, I cannot in general
    #say that two packages are the same if the ID and seq are identical, since the device
    #recycles the same id every few hours. Hence the idea is to have a continuously updated list with
    #the hash values of the latest (say, last 3 seconds) recorded packages. Then I just see if the hash
    #of my newly-detected packet is there or not. If it is, then it is an already detected contact and
    #I will not add it to my contact list, if it is not, then I will record such an event.
    #I also need to trim and update my list as time progresses.


    #I left the comments, but I stopped using my_hash_set. The reason is that, pretty much the
    #same way, I can check whether a certain entry does or does not belong to an array.
    #The repetition of a few elements in an array does not justify using an altogether different
    #data structure

    #my_hash_set=set() #I save the result of hash((id,seq)) into a set
    #This is the right data type since it does not change if another existing
    #element is added and it is easy to check if a certain value is already
    #in the set.


    end=start+delta #time at which I will stop reading
    
    my_hash_list=s.zeros(0)  #This scipy array will be used to store the hash of the
    #detected packages as time goes by. Unlike the set defined above, this does not prevent
    #any repetition of the detected (or rather calculated) hash value

    
    contact_times=s.zeros(0) #in a similar fashion as I did above, this array will be used to
    #store the timestamps of the detected packages

    #I am using the two previous arrays simply as easy-to-manipulate lists

    

    #delta_t is the admissible time-delay to consider two timestamps as close enough
    #to be potentially identical

    #delta_t=5
    
    P = pcap.pcapObject()

    P.open_offline(dumpfile)


    temp_arr=s.zeros(7)-1 #this array will be used to store the results of the computations
    #and it is also the first row (temporarily) of the array (it will be removed later on
    #in this function)
    
    temp_arr=temp_arr.astype('int')

    flag=0

    flag2=0
    

    while 1:

        

        t=P.next() #So I start reading the info for each packet in increasing time

        if (t!=None): #a condition so that the code does not crash if the end of
            #the log file is reached while reading

            data_array=process_packet_reduced(*t)

            if (data_array!= None): #Important! Otherwise the code would break down
                #when a packet has been rejected and nothing is read into data_array

                #The previous if condition says that something should be done only if
                #at least a contact protocol has been detected 

                #print out the first time a contact is detected

                if ((flag2==0) and (data_array[0]!= -1)):
                    print "the first time a valid contact report is sent is, ", data_array[0]
                    flag2=1


                if ((data_array[0]>=start) and (data_array[0]<=end)): #that means: if the time I am
                    #reading is within the interval I have chosen for packet analysis. This automa
                    #tically rules out the case of non-detection of a contact, in which case the
                    #entry data_arry[0] would be -1

                    #print "time is, ", data_array[0]-start

                    if (flag == 0):

                        print "time is, ", data_array[0], "and I start reading"

                        flag=1


                    #Now an important point: I test directly now whether the newly-read
                    #packet really represents a new interaction or it is another already accounted
                    #for but repeated twice

                    #NB: data_array[1] contains hash((id,seq))
                    #whereas data_array[0] contains the timestamp of the detected package

                    #I only left the comments. Now I simply check whether the hash((id,seq))
                    #is in the array that contains them. Since I discard it if it is already
                    #there, then I do not have any problem of multiple occurrence.

                    #if (data_array[1] not in my_hash_set): #that is to say: the hash((id,seq))
                        #of the packet should not have already been previously recorded.
                        #this is certainly true at least to start from, when my_hash_set
                        #is empty


                    if (data_array[1] not in my_hash_list): #that is to say: the hash((id,seq))
                        #of the packet should not have already been previously recorded.
                        #this is certainly true at least to start from, when my_hash_list
                        #is empty


                        temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
                        #temp_arr has an extra row! (the initialized value of temp_arr we spoke
                        #of before)

                        #temp_arr contains the info about the recorded contacts

                    #my_hash_set.add(data_array[1]) #this is a set: no entry is ever duplicated
                    #This way I add the hash value of the latest packet I have read to the
                    #set of hash values of the detected contacts


                    my_hash_list=s.hstack((my_hash_list,data_array[1])) #Now I save
                    #the same info as above into an array. Here entries can be duplicated without
                    #problems.


                    contact_times=s.hstack((contact_times, data_array[0])) #In a similar fashion
                    #as above, I save the timestamps of the detected contacts allowing for
                    #repetitions.


                    #Now I remove from my list the contact times which are too old

                    #First I identify the contacts which took place a longer than delta_t ago

                    time_sel=s.where((data_array[0]-contact_times)<delta_t)
                    #Then I get rid of those contacts
                    contact_times=contact_times[time_sel]
                    #and of the corresponding hash values
                    my_hash_list=my_hash_list[time_sel]
                    #and I save again as a set the array with the updated hash values
                    #my_hash_set=set(my_hash_list)

                elif(data_array[0]>end):
                    print "time is, ", data_array[0], "and I stop reading"
                    break
        elif(t==None):
            print "end of file"
            break

                    
        

        
    temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
    #and was used to initialize the array

    
    
    return temp_arr





def iter_local( start, delta, dumpfile, delta_t, delta_slice,tag_no_rep_out):
    #This is a modification of iter_process_packet_reduced_improved_new. It saves the
    #results more often in a multitude of files while post-processing them instead of
    #keeping in memory expensive global quantities.
    

    #I need to introduce some extra checks to determine whether a packet really represents
    # a new event or not. In general, there are three things I should take into account
    #1)-2) whether the packets have the same id (of the examined tags) and sequence numbers
    #(of the broadcast packet)
    #3) whether the packets have a similar time-stamp. Due to delays in the broadcast,
    # two packets emitted with a slight (say, 1 sec) time-gap still represent the
    #same contact, if they have the same id and seq. However, I cannot in general
    #say that two packages are the same if the ID and seq are identical, since the device
    #recycles the same id every few hours. Hence the idea is to have a continuously updated list with
    #the hash values of the latest (say, last 3 seconds) recorded packages. Then I just see if the hash
    #of my newly-detected packet is there or not. If it is, then it is an already detected contact and
    #I will not add it to my contact list, if it is not, then I will record such an event.
    #I also need to trim and update my list as time progresses.


    #I left the comments, but I stopped using my_hash_set. The reason is that, pretty much the
    #same way, I can check whether a certain entry does or does not belong to an array.
    #The repetition of a few elements in an array does not justify using an altogether different
    #data structure

    #my_hash_set=set() #I save the result of hash((id,seq)) into a set
    #This is the right data type since it does not change if another existing
    #element is added and it is easy to check if a certain value is already
    #in the set.


    end=start+delta #time at which I will stop reading
    
    my_hash_list=s.zeros(0)  #This scipy array will be used to store the hash of the
    #detected packages as time goes by. Unlike the set defined above, this does not prevent
    #any repetition of the detected (or rather calculated) hash value

    
    contact_times=s.zeros(0) #in a similar fashion as I did above, this array will be used to
    #store the timestamps of the detected packages

    #I am using the two previous arrays simply as easy-to-manipulate lists

    

    #delta_t is the admissible time-delay to consider two timestamps as close enough
    #to be potentially identical

    #delta_t=5
    
    P = pcap.pcapObject()

    P.open_offline(dumpfile)


    temp_arr=s.zeros(7)-1 #this array will be used to store the results of the computations
    #and it is also the first row (temporarily) of the array (it will be removed later on
    #in this function)
    
    temp_arr=temp_arr.astype('int')

    flag=0

    flag2=0

    low_bin=0 #what I will use to keep track of the bin structure

    count_flag=0 #this flag tells me whether I finished reading a timeslice.
    time_delta=0 #this will tell me the time progressed after reading the previous slice

    extension=".dat"

    slice_label=0
    
    while 1:

        

        t=P.next() #So I start reading the info for each packet in increasing time

        if (t!=None): #a condition so that the code does not crash if the end of
            #the log file is reached while reading

            data_array=process_packet_reduced(*t)

            if (data_array!= None): #Important! Otherwise the code would break down
                #when a packet has been rejected and nothing is read into data_array

                #The previous if condition says that something should be done only if
                #at least a contact protocol has been detected 

                #print out the first time a contact is detected

                if ((flag2==0) and (data_array[0]!= -1)):
                    print "the first time a valid contact report is sent is, ", data_array[0]
                    flag2=1


                if ((data_array[0]>=start) and (data_array[0]<=end)): #that means: if the time I am
                    #reading is within the interval I have chosen for packet analysis. This automa
                    #tically rules out the case of non-detection of a contact, in which case the
                    #entry data_arry[0] would be -1

                    #print "time is, ", data_array[0]-start


                    

                    if (flag == 0):

                        print "time is, ", data_array[0], "and I start reading"

                        flag=1


                    #Now an important point: I test directly now whether the newly-read
                    #packet really represents a new interaction or it is another already accounted
                    #for but repeated twice

                    #NB: data_array[1] contains hash((id,seq))
                    #whereas data_array[0] contains the timestamp of the detected package

                    #I only left the comments. Now I simply check whether the hash((id,seq))
                    #is in the array that contains them. Since I discard it if it is already
                    #there, then I do not have any problem of multiple occurrence.

                    #if (data_array[1] not in my_hash_set): #that is to say: the hash((id,seq))
                        #of the packet should not have already been previously recorded.
                        #this is certainly true at least to start from, when my_hash_set
                        #is empty


                    if (data_array[1] not in my_hash_list): #that is to say: the hash((id,seq))
                        #of the packet should not have already been previously recorded.
                        #this is certainly true at least to start from, when my_hash_list
                        #is empty



                        #Here I need a condition: before appending the new data to the array,
                        #I can tell that I am in a new time-slice, since I already know whether
                        #the new time is acceptable or not.

                        if (count_flag == 0): #the condition goes here since I need to make sure that
                            #the time is perfectly acceptable
                            #time_delta = data_array[0] #time from beginning of the slice
                            low_bin=start #this is done only 
                            count_flag=1 #important in order to go through this only once

                        #if (data_array[0]-time_delta>=delta_slice):
                        if (data_array[0]-low_bin>=delta_slice): #this means I went through
                            #a time slice and it is time to save the contacts

                            #delta_label=s.fix((data_array[0]-start)/delta_slice)*delta_slice

                            #delta_label=low_bin

                            #I use delta_label to avoid troubles in case I have a time_slice during
                            #which no contact are estabilished (which should be rather rasre however)

                            #print "delta_label is, ", delta_label

                            #time_delta=data_array[0] #update time at the beginning of a slice
                            #here is the mistake! This time is already in the new slice!!!

                            print "time is, ", data_array[0]

                            temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply
                            #a set of -1 and was used to initialize the array

                            #print "temp_arr is, ", temp_arr
                            #print "s.shape(temp_arr) is, ", s.shape(temp_arr) 

                            tag_tag_slice=output_graph_improved(temp_arr, tag_no_rep_out)

                            #if (slice_label == 1785):
                                #p.save("slice_1785.dat",tag_tag_slice, fmt='%d')
                                #p.save("temp_arr_1785.dat", temp_arr, fmt='%d' )

                            #if (slice_label == 1786):
                                #p.save("slice_1786.dat",tag_tag_slice, fmt='%d')
                                #p.save("temp_arr_1786.dat", temp_arr, fmt='%d' )


                            #if (slice_label == 1640):
                                #p.save("slice_1640.dat",tag_tag_slice, fmt='%d')
                                #p.save("temp_arr_1640.dat", temp_arr, fmt='%d' )


                            #if (slice_label == 200):
                                #p.save("slice_200.dat",tag_tag_slice, fmt='%d')
                                #p.save("temp_arr_200.dat", temp_arr, fmt='%d' )

                            #if (slice_label == 201):
                                #p.save("slice_201.dat",tag_tag_slice, fmt='%d')
                                #p.save("temp_arr_201.dat", temp_arr, fmt='%d' )



                            #print "tag_tag_slice is, ", tag_tag_slice
                            #print "s.shape(tag_tag_slice) is, ", s.shape(tag_tag_slice)
                            #bin_dyn=time_binned_interaction_new(tag_tag_slice, delta_slice,\
                            #delta_label,start)


                            #bin_dyn=time_binned_interaction_new_reduced(tag_tag_slice, delta_slice,\
                            #                                   delta_label,start)

                            bin_dyn=time_binned_interaction_new_reduced(tag_tag_slice, delta_slice,\
                                                                low_bin,start)

                            #print "slice_label is, ", slice_label

                            #Now I need to update the value of low_bin using only the lcal
                            
                            low_bin=s.fix((data_array[0]-start)/delta_slice)*delta_slice+start
                        
                            temp_name="sliced_dynamics_self_loops%05d"%slice_label
                            temp_name=temp_name+extension

                            p.save(temp_name,bin_dyn, fmt='%d')

                            #dump_matrix(bin_dyn[0] ,slice_label)

                            
                            #temp_name="sliced_dynamics%05d"%slice_label
                            #temp_name=temp_name+extension

                            #p.save(temp_name,bin_dyn[1], fmt='%d')

                            

                            slice_label += 1




                            #now re-initialize temp_arr
                                
                            temp_arr=s.zeros(7)-1 #this array will be used to store the results
                            #of the computations
                            #and it is also the first row (temporarily) of the array
                            #(it will be removed later on in this function)
    
                            temp_arr=temp_arr.astype('int')
                                

                        


                        temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
                        #temp_arr has an extra row! (the initialized value of temp_arr we spoke
                        #of before)

                        #temp_arr contains the info about the recorded contacts

                    #my_hash_set.add(data_array[1]) #this is a set: no entry is ever duplicated
                    #This way I add the hash value of the latest packet I have read to the
                    #set of hash values of the detected contacts


                    my_hash_list=s.hstack((my_hash_list,data_array[1])) #Now I save
                    #the same info as above into an array. Here entries can be duplicated without
                    #problems.


                    contact_times=s.hstack((contact_times, data_array[0])) #In a similar fashion
                    #as above, I save the timestamps of the detected contacts allowing for
                    #repetitions.


                    #Now I remove from my list the contact times which are too old

                    #First I identify the contacts which took place a longer than delta_t ago

                    time_sel=s.where((data_array[0]-contact_times)<delta_t)
                    #Then I get rid of those contacts
                    contact_times=contact_times[time_sel]
                    #and of the corresponding hash values
                    my_hash_list=my_hash_list[time_sel]
                    #and I save again as a set the array with the updated hash values
                    #my_hash_set=set(my_hash_list)

                elif(data_array[0]>end):
                    print "time is, ", data_array[0], "and I stop reading"
                    break
        elif(t==None):
            print "end of file"
            break

                    
        

        
    #temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
    #and was used to initialize the array

    
    
    return 0









def single_tag_contact_times(sliced_data, tag_id):

    #The idea is simple: thanks to the output of time_binned_interaction
    #I have a list of binary interactions in time intervals delta_slice
    #along the whole set of recorded times.
    #This means I can follow a given tag by selecting his ID number and looking
    #for it through the data 
    
    sel=s.where(sliced_data==tag_id)

    #now I need to add a condition in case the id of the tag I have chosen is non-existing
    #NB: this is not a problem when I evaluate the global quantities (e.g. I consider the
    #contacts between all tags) since in that case I read the id`s of the existing tags from
    #an array. The problem is when I run a test and track a tag which I do not know a priori
    #if existing in the simulation at all
    
    if (len(sel[0])==0):
        #print "the chosen tag does not exist"
        return
    
    tag_contact_times=sliced_data[sel[0],0] #I select the times at which
    #the tag I am tracking undergoes a collision.

    #p.save("single_tag_contact_times.dat",tag_contact_times,fmt='%d')

    tag_no_rep=n.unique1d(tag_contact_times) #The idea is the following:
    #in a given time interval delta_slice, a tag may undergo multiple contacts
    #with different tags. This corresponds to different entries in the output
    #of time_binned_interaction. That function does not allow for multiple contacts between
    # the SAME two tags being reported in the same time slice, but it allows the same tag ID
    #to appear twice in the same time window if it gets in touch with TWO different tags
    #within the same delta_slice. It is fundamental to know that in a given time slice
    #tag A has estabilished contact with tag B and tag C (if I discard any bit of this info,
    #then I lose info about the state of the network at that time slice), but when it comes to
    #simply having the time-dependent distribution of contact durations and intervals between
    #any two contacts estabilished by packet A, I will simply say that tag A reported a contact
    #in that given time-slice. More sophisticated statistics (e.g. the number of contacts
    #estabilished by tag A in a given time slice), can be implemented if found useful/needed
    #later on.



    #p.save("single_tag_contact_times_no_rep.dat",tag_no_rep,fmt='%d')

    return  tag_no_rep



def contact_duration_and_interval_single_couple(time_list, delta_slice):
    #this function is modelled on contact_duration_and_interval_single_tag.
    #it is only the initial array to be different
    if (time_list==None):
        print "The chosen couple does not exist hence no analysis can be performed on it"
        return
    
    delta_slice=int(delta_slice) #I do not need floating point arithmetic

    time_list=(time_list-time_list[0])/delta_slice
    gaps=s.diff(time_list) #a bit more efficient than the line above

    #print "gaps is, ", gaps

    #gaps is now an array of integers. It either has a list of consecutive 1`s
    # (which means a contact duration of delta_slice times the number of consecutive ones)
    # of an entry higher than one which expresses (in units of delta_slice) the time during
    #which the tag underwent no contact
    

    #p.save("gaps.dat",gaps, fmt='%d')

    find_gap=s.where(gaps != 1)

    gap_distr=(gaps[find_gap]-1)*delta_slice  #so, this is really the list of the
    #time interval between two contacts for my tag. After the discussion with Ciro,
    #I modified slightly the definition (now there is a -1) in the definition.
    #It probably does not matter much for the calculated distribution.

    #print "gap_distr is, ", gap_distr
    #NB: the procedure above does NOT break down is gap_distr is empty


    #Now I calculate the duration of the contacts of my tag. I changed this bit since
    #I had new discussions with Ciro

    #single_tag_no_rep=s.hstack((0,single_tag_no_rep))

    #print "single_tag_no_rep is, ", single_tag_no_rep, "and its length is, ", len(single_tag_no_rep)
    
    e2=s.diff(time_list)

    #print "e2 is, ", e2
    
    sel=s.where(e2!=1)[0]
    #print "sel is, ", sel

    res=0 #this will contain the results and will be overwritten

    #What is here needs to be tested very carefully! There may be some bugs


    sol=s.hstack((0,sel,len(e2)))
    #print "sol is, ", sol

        
    
    res=s.diff(sol)
    #print "res initially is, ", res


    res[0]=res[0]+1 #to account for troubles I normally have at the beginning of the array

    #print "res is, ", res
        
        


    res=res*delta_slice


    #print "the sum of all the durations is, ", res.sum()

    return res,gap_distr 




def contact_duration_and_interval_single_tag(single_tag_no_rep, delta_slice):

    #the following if condition is useful only when I am really tracking a particular
    #tag whose ID is given a priory but which may not exist at all (in the sense that
    #it would not estabilish any contact) in the time window during which I am studying
    #the system.


    if (single_tag_no_rep==None):
        print "The chosen tag does not exist hence no analysis can be performed on it"
        return



    delta_slice=int(delta_slice) #I do not need floating point arithmetic

    single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
    gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above

    #print "gaps is, ", gaps

    #gaps is now an array of integers. It either has a list of consecutive 1`s
    # (which means a contact duration of delta_slice times the number of consecutive ones)
    # of an entry higher than one which expresses (in units of delta_slice) the time during
    #which the tag underwent no contact
    

    #p.save("gaps.dat",gaps, fmt='%d')

    find_gap=s.where(gaps != 1)

    gap_distr=(gaps[find_gap]-1)*delta_slice  #so, this is really the list of the
    #time interval between two contacts for my tag. After the discussion with Ciro,
    #I modified slightly the definition (now there is a -1) in the definition.
    #It probably does not matter much for the calculated distribution.

    #print "gap_distr is, ", gap_distr
    #NB: the procedure above does NOT break down is gap_distr is empty


    #Now I calculate the duration of the contacts of my tag. I changed this bit since
    #I had new discussions with Ciro

    #single_tag_no_rep=s.hstack((0,single_tag_no_rep))

    #print "single_tag_no_rep is, ", single_tag_no_rep, "and its length is, ", len(single_tag_no_rep)
    
    e2=s.diff(single_tag_no_rep)

    #print "e2 is, ", e2
    
    sel=s.where(e2!=1)[0]
    #print "sel is, ", sel

    res=0 #this will contain the results and will be overwritten

    #What is here needs to be tested very carefully! There may be some bugs


    sol=s.hstack((0,sel,len(e2)))
    #print "sol is, ", sol

        
    
    res=s.diff(sol)
    #print "res initially is, ", res


    res[0]=res[0]+1 #to account for troubles I normally have at the beginning of the array

    #print "res is, ", res
        
        


    res=res*delta_slice


    #print "the sum of all the durations is, ", res.sum()

    return res,gap_distr 

def contact_duration_and_interval_many_tags(sliced_interactions, delta_slice):


    #This function iterates interval_between_contacts_single_tag on a all the tag ID`s
    #thus outputting the distribution of time intervals between any two contacts in the system.

    tag_ids= n.unique1d(s.ravel(sliced_interactions[:,1:3])) #to get a list of
    #all tag ID`s, which appear (repeated) on two rows of the matrix output by 
    # time_binned_interaction
    
    tag_ids=tag_ids.astype('int')

    

    #print "tag_ids is, ", tag_ids

    overall_gaps=s.zeros(0) #this array will contain the time intervals between two consecutive
    #contacts for all the tags in the system.



    overall_duration=s.zeros(0) #this array will contain the time duration of the
    #contacts for all the tags in the system.



    for i in xrange(len(tag_ids)): 
        track_tag_id=tag_ids[i]  #i.e. iterate on all tags
        
        contact_times=single_tag_contact_times(sliced_interactions, track_tag_id) #get
        #an array with all the interactions of a given tag

        #print "contact_times is, ", contact_times

        results=contact_duration_and_interval_single_tag(contact_times, delta_slice)

        tag_duration=results[0]

        
        tag_intervals=results[1] #get
        #an array with the time intervals between two contacts for a given tag

        
        #print "tag_intervals is, ", tag_intervals
        
        overall_gaps=s.hstack((overall_gaps,tag_intervals)) #collect
        #the results on all tags
        
        
        #print "overall_gaps is, ", overall_gaps

        overall_duration=s.hstack((overall_duration,tag_duration))

    #overall_gaps=overall_gaps[s.where(overall_gaps !=0)]
    #overall_duration=overall_duration[s.where(overall_duration !=0)]
    p.save("many_tags_contact_interval_distr2.dat", overall_gaps ,  fmt='%d')
    p.save("many_tags_contact_duration_distr2.dat", overall_duration ,  fmt='%d')

    return overall_duration, overall_gaps






def contact_duration_single_tag(single_tag_no_rep, delta_slice):

    delta_slice=int(delta_slice) #I do not need floating point arithmetic

    single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice
    
    #Now a series of array manipulations. If this function works, it should be merged
    #with interval_between_contacts_single_tag

    #append 0 in front of the array
    single_tag_no_rep=s.hstack((0,single_tag_no_rep))

    e2=s.diff(single_tag_no_rep)
    sel=s.where(e2!=1)[0]
    f=s.diff(sel)
    res=f[s.where(f!=1)]

    res=res-1 #since e.g. having a sequence (0,1,2) means 3 adjacent numbers but the overall
    #contact lasts TWO (not three) delta_slice`s

    res=res*delta_slice #to have the time intervals in the right units

    return res


def contact_duration_many_tags(sliced_interactions, delta_slice):

    #this function is basically a slight modification of interval_between_contacts_many_tags
    #See that function for documentation. This also accounts for the non-intuitive choice of the names
    #used for the arrays



    tag_ids= n.unique1d(s.ravel(sliced_interactions[:,1:3])) #to get a list of
    #all tag ID`s, which appear (repeated) on two rows of the matrix output by 
    # time_binned_interaction
    
    tag_ids=tag_ids.astype('int')

    

    #print "tag_ids is, ", tag_ids

    overall_gaps=s.zeros(0) #this array will contain the time intervals between two consecutive
    #contacts for all the tags in the system.

    for i in xrange(len(tag_ids)): 
        track_tag_id=tag_ids[i]  #i.e. iterate on all tags
        
        contact_times=single_tag_contact_times(sliced_interactions, track_tag_id) #get
        #an array with all the interactions of a given tag

        #print "contact_times is, ", contact_times
        
        tag_intervals=contact_duration_single_tag(contact_times, delta_slice) #get
        #an array with the time intervals between two contacts for a given tag

        #print "tag_intervals is, ", tag_intervals
        
        overall_gaps=s.hstack((overall_gaps,tag_intervals)) #collect
        #the results on all tags

        #print "overall_gaps is, ", overall_gaps
        
    p.save("many_tags_contact_duration_distr.dat", (overall_gaps) ,  fmt='%d')

        
    return overall_gaps









def interval_between_contacts_single_tag(single_tag_no_rep, delta_slice):

    delta_slice=int(delta_slice) #I do not need floating point arithmetic

    single_tag_no_rep=(single_tag_no_rep-single_tag_no_rep[0])/delta_slice


    #The previous line serves the purpose of rescaling time in units of the
    #chosen time interval delta_slice and to reset the time origin to the initial
    #recorded time. This is simply convenient and has no effect on the statistics
    #of the contact duration distribution and on the distribution of the time
    #interval between consecutive contacts for a given tag
    

    #gaps=single_tag_no_rep[1:]-single_tag_no_rep[:-1]

    gaps=s.diff(single_tag_no_rep) #a bit more efficient than the line above


    #gaps is now an array of integers. It either has a list of consecutive 1`s
    # (which means a contact duration of delta_slice times the number of consecutive ones)
    # of an entry higher than one which expresses (in units of delta_slice) the time during
    #which the tag underwent no contact
    

    #p.save("gaps.dat",gaps, fmt='%d')

    find_gap=s.where(gaps != 1)

    gap_distr=(gaps[find_gap]-1)*delta_slice  #so, this is really the list of the
    #time intervals between contacts (i.e. the time interval separating two contacts)
    #of the tag I am tracking

    #p.save("single_tag_gap_distr.dat", gap_distr*delta_slice, fmt='%d')

    return gap_distr


def interval_between_contacts_many_tags(sliced_interactions, delta_slice):


    #This function iterates interval_between_contacts_single_tag on a all the tag ID`s
    #thus outputting the distribution of time intervals between any two contacts in the system.

    tag_ids= n.unique1d(s.ravel(sliced_interactions[:,1:3])) #to get a list of
    #all tag ID`s, which appear (repeated) on two rows of the matrix output by 
    # time_binned_interaction
    
    tag_ids=tag_ids.astype('int')

    

    #print "tag_ids is, ", tag_ids

    overall_gaps=s.zeros(0) #this array will contain the time intervals between two consecutive
    #contacts for all the tags in the system.

    for i in xrange(len(tag_ids)): 
        track_tag_id=tag_ids[i]  #i.e. iterate on all tags
        
        contact_times=single_tag_contact_times(sliced_interactions, track_tag_id) #get
        #an array with all the interactions of a given tag

        #print "contact_times is, ", contact_times
        
        tag_intervals=interval_between_contacts_single_tag(contact_times, delta_slice) #get
        #an array with the time intervals between two contacts for a given tag

        #print "tag_intervals is, ", tag_intervals
        
        overall_gaps=s.hstack((overall_gaps,tag_intervals)) #collect
        #the results on all tags

        #print "overall_gaps is, ", overall_gaps
        
    p.save("many_tags_contact_interval_distr.dat", (overall_gaps) ,  fmt='%d')

        
    return overall_gaps







# def iter_process_packet_reduced_improved( start, delta, dumpfile):
    
#     P = pcap.pcapObject()

#     P.open_offline(dumpfile)


#     temp_arr=s.zeros(6)-1 #this array will be used to store the results of the computations
#     #and it is also the first row (temporarily) of the array (it will be removed later on
#     #in this function)
    
#     temp_arr=temp_arr.astype('int')

#     i=0  #initialize counter

#     while 1:

#         t=P.next() #So I start reading the info for each packet in increasing time
    
#         if (i>start and i<=start+delta): #condition about when to start and
#             #finish acquiring the data
#             data_array=process_packet_reduced(*t)

#             if (data_array!= None): #Important! Otherwise the code would break down
#                 #when a packet has been rejected and nothing is read into data_array
            
#                 if (data_array[0] != -1): #that means: the value I used to initialize
#                     #the array (in particular time) is -1. If that is still the value,
#                     #then the array has never been overwritten and no contacts was detected.
#                     #Hence there is nothing worth saving
                
#                     temp_arr=s.vstack((temp_arr,data_array)) #careful! this way
#                     #temp_arr has an extra row! (the initialized value of temp_arr we spoke
#                     #of before)
            
#         i=i+1 #increase the counter

#         if (i>start+delta):

#             print "End of the game"

#             break
        
#     temp_arr=temp_arr[1:, :] #Now we remove the first row, which was simply a set of -1
#     #and was used to initialize the array
    
#     return temp_arr





# def process_packet(pktlen, data, timestamp):
#     #print time.ctime(timestamp), pktlen, len(data)
#     payload = data[44:44+16]
#     decrypted_data = xxtea.decode(payload)
#     station_id = struct.unpack('!L', data[28:28+4])[0]
#     tstamp = int(timestamp)
#     proto = struct.unpack("!B", decrypted_data[0])[0]

#     if proto == PROTO_CONTACTREPORT:
#         (proto, id, flags, id1, id2, id3, id4, seq, crc) = struct.unpack("!BHBHHHHHH", decrypted_data)
#         if crc != xxtea.crc16(decrypted_data[:14]):
#             print 'rejecting packet from 0x%08x on CRC' % station_id
#             return

#         contact = Contact(tstamp, station_id, id, seq, [id1, id2, id3, id4], flags)

#     elif proto == PROTO_BEACONTRACKER_NEW:
#         (proto, id, flags, strength, id_last_seen, boot_count, reserved, seq, crc) = struct.unpack("!BHBBHHBLH", decrypted_data)
#         if crc != xxtea.crc16(decrypted_data[:14]):
#             print 'rejecting packet from 0x%08x on CRC' % station_id
#             return
       
#         sighting = Sighting(tstamp, station_id, id, seq, strength, flags, last_seen=id_last_seen, boot_count=boot_count)
#         print sighting



#The following function is used to keep track of the contacts. Function iter_process_packet_reduced
#returns a list of times and contacts between the tags. However, this is often redundant (i.e. the same
#contact between e.g. tag 1 and 20 could have been recorded simulataneously (same timestamp) by
#different detectors, but the physical event is the same.)

def order_count(A):

    d = {}
    for r in A:
       t = tuple(r)
       d[t] = d.get(t,0) + 1

    # The dict d now has the counts of the unique rows of A.

    B = n.array(d.keys())    # The unique rows of A
    C = n.array(d.values())  # The counts of the unique rows

    return B,C


def time_binned_interaction(tag_tag, delta_slice):

    #tag_tag is a an array containing the tag-tag interaction
    #already in a binary form. It has a rather simple structure
    #    timestamp tag A   tag B
    # i.e. it has only three columns and the id of tag A is always
    #lower than the one of tag B (a convention for "sorting" the contacts)

    time_raw=tag_tag[:,0] #time as taken from the dump file
    #(first column of tag_tag [labelled with a zero])
    time_grid=s.arange(min(time_raw),(max(time_raw)+1.), delta_slice) #I define a
    #new time vector with a spacing given by delta slice. I will look at snapshot of the
    #system taken every delta_slice times (or better: whatever interactions I record in a time
    #span delta_slice, will contribute to the list of recorded interactions [i.e.adjacency
    #list] in the time interval [t, t+delta_slice]).

    #print "time_grid is, ", time_grid

    slice_lump=s.zeros(3)-1. #again, this is simply an initialization
    #the array above will contain views of the network recorded every time_slice (with the
    #meaning expressed above).
    
    tag_tag=tag_tag[:,1:3] #now I have an array for time and one for binary interaction
    #(tag_tag now is an array with 2 columns only containing the data for the binary interaction)

    print "s.shape(tag_tag is), ", s.shape(tag_tag)
    
    for i in xrange(len(time_grid)-1):
        sel_slice=s.where((time_raw>=time_grid[i]) & (time_raw<time_grid[i+1])) #i.e. this
        #selects the binary interaction in a generic delta_slice time interval that scans
        #through all recorded times.
        
        my_slice=tag_tag[sel_slice, :] # binary contacts in the bins
        my_slice=my_slice[0]  #this is a bit tricky, but it looks the only ways to
        #extract the binary interactions (from an array with TWO columns) in delta_slice

        #Now I have to be careful and introduce an extra if condition: it is possible that
        #for a given time slice, no contacts are detected. This case has to be spotted,
        #otherwise the routine would crash.

        if (s.shape(my_slice)[0]>0): #i.e. some contacts have been detected


            #print "my_slice is, ", my_slice
            #print "s.shape(my_slice) is, ", s.shape(my_slice)

            #Now I replace my slice with an array where no line is ever repeated

            #This is the underlying philosophy: I read all the binary interactions taking place in the
            #system among all tags in a given delta_slice.
            #Now, first of all the interactions are already ordered in increasing order (i.e.
            #in each line I can have 123 345 but NOT 345 123) (but this is done by some other routine)

            #I could have twice the same entry e.g.

            # e.g.

            #   123 345
            #   123 345

            #if e.g. the same two tags interact twice in the same delta_slice.
            #NB: the SAME tags, but TWO different interactions in the same time slice (with two
            #different associated timestamps [too different to be within the experimental tolerance])

            #However, since I want a single "view" of the system in a given delta_slice, I will get rid
            #of duplicate entries like the one above. I will simply say that there has been a contact
            #between tag 123 and 345 in delta_slice (hence there will be adjacent in the network).

            #using the tag_tag contact list, I fundamentally get a series of adjacency lists as
            #time progresses



            my_slice=order_count(my_slice)[0] #this way I get the unique rows of tag_tag in
            # a given delta_slice


            my_time_slice=s.ones(len(my_slice))*time_grid[i+1] #I choose to label time
            #by the upper boundary of the [time_grid[i], time_grid[i+1]] time interval
            #(this is a convention which can be changed).

            slice_lump_temp=s.transpose(s.vstack((my_time_slice,s.transpose(my_slice))))
            slice_lump=s.vstack((slice_lump, slice_lump_temp))

            #the previous two lines are just a (complicated) way to pack together the list of
            #binary contacts (adjacency matrices) as time progresses


    #Now get rid of the initialization

    #print "slice_lump is, ", slice_lump
    
    slice_lump=slice_lump[1:len(slice_lump), :]

    #p.save("sliced_dynamics_self_loops.dat",slice_lump, fmt='%d') #to save everything \
    #as an array of integers

    #Now get rid of the self-loops
    sel=s.where(slice_lump[:,1]!=slice_lump[:,2])

    
    
    slice_lump_no_loops=slice_lump[sel,:][0]  #again, careful since this can be tricky!
    #print "slice_lump_new is, ", slice_lump
    #p.save("sliced_dynamics.dat",slice_lump, fmt='%d') 

    
    return [slice_lump, slice_lump_no_loops]


def time_binned_interaction_new(tag_tag, delta_slice, delta_label, start):

    #tag_tag is a an array containing the tag-tag interaction
    #already in a binary form. It has a rather simple structure
    #    timestamp tag A   tag B
    # i.e. it has only three columns and the id of tag A is always
    #lower than the one of tag B (a convention for "sorting" the contacts)

    
    time_grid=delta_label+start


    #I define a
    #new time vector with a spacing given by delta slice. I will look at snapshot of the
    #system taken every delta_slice times (or better: whatever interactions I record in a time
    #span delta_slice, will contribute to the list of recorded interactions [i.e.adjacency
    #list] in the time interval [t, t+delta_slice]).

    #print "time_grid is, ", time_grid

    slice_lump=s.zeros(3)-1. #again, this is simply an initialization
    #the array above will contain views of the network recorded every time_slice (with the
    #meaning expressed above).
    
    tag_tag=tag_tag[:,1:3] #now I have an array for time and one for binary interaction
    #(tag_tag now is an array with 2 columns only containing the data for the binary interaction)

    #print "s.shape(tag_tag is), ", s.shape(tag_tag)
    
    #for i in xrange(len(time_grid)-1):

    #sel_slice=s.where((time_raw>=time_grid[i]) & (time_raw<time_grid[i+1])) #i.e. this

    #selects the binary interaction in a generic delta_slice time interval that scans
    #through all recorded times.

    #my_slice=tag_tag[sel_slice, :] # binary contacts in the bins
    #my_slice=my_slice[0]  #this is a bit tricky, but it looks the only ways to
    #extract the binary interactions (from an array with TWO columns) in delta_slice


    my_slice=tag_tag
    
    #Now I have to be careful and introduce an extra if condition: it is possible that
    #for a given time slice, no contacts are detected. This case has to be spotted,
    #otherwise the routine would crash.

    if (s.shape(my_slice)[0]>0): #i.e. some contacts have been detected


        #print "my_slice is, ", my_slice
        #print "s.shape(my_slice) is, ", s.shape(my_slice)

        #Now I replace my slice with an array where no line is ever repeated

        #This is the underlying philosophy: I read all the binary interactions taking place in the
        #system among all tags in a given delta_slice.
        #Now, first of all the interactions are already ordered in increasing order (i.e.
        #in each line I can have 123 345 but NOT 345 123) (but this is done by some other routine)

        #I could have twice the same entry e.g.

        # e.g.

        #   123 345
        #   123 345

        #if e.g. the same two tags interact twice in the same delta_slice.
        #NB: the SAME tags, but TWO different interactions in the same time slice (with two
        #different associated timestamps [too different to be within the experimental tolerance])

        #However, since I want a single "view" of the system in a given delta_slice, I will get rid
        #of duplicate entries like the one above. I will simply say that there has been a contact
        #between tag 123 and 345 in delta_slice (hence there will be adjacent in the network).

        #using the tag_tag contact list, I fundamentally get a series of adjacency lists as
        #time progresses



        my_slice=order_count(my_slice)[0] #this way I get the unique rows of tag_tag in
        # a given delta_slice


        my_time_slice=s.ones(len(my_slice))*time_grid #I choose to label time
        #by the upper boundary of the [time_grid[i], time_grid[i+1]] time interval
        #(this is a convention which can be changed).

        slice_lump_temp=s.transpose(s.vstack((my_time_slice,s.transpose(my_slice))))
        slice_lump=s.vstack((slice_lump, slice_lump_temp))

        #the previous two lines are just a (complicated) way to pack together the list of
        #binary contacts (adjacency matrices) as time progresses


    #Now get rid of the initialization

    #print "slice_lump is, ", slice_lump
    
    slice_lump=slice_lump[1:len(slice_lump), :]

    #p.save("sliced_dynamics_self_loops.dat",slice_lump, fmt='%d') #to save everything \
    #as an array of integers

    #Now get rid of the self-loops
    sel=s.where(slice_lump[:,1]!=slice_lump[:,2])

    
    
    slice_lump_no_loops=slice_lump[sel,:][0]  #again, careful since this can be tricky!
    #print "slice_lump_new is, ", slice_lump
    #p.save("sliced_dynamics.dat",slice_lump, fmt='%d') 

    
    return [slice_lump, slice_lump_no_loops]




def time_binned_interaction_new_reduced(tag_tag, delta_slice, delta_label, start):

    #tag_tag is a an array containing the tag-tag interaction
    #already in a binary form. It has a rather simple structure
    #    timestamp tag A   tag B
    # i.e. it has only three columns and the id of tag A is always
    #lower than the one of tag B (a convention for "sorting" the contacts)

    
    time_grid=delta_label+delta_slice   #+start


    #I define a
    #new time vector with a spacing given by delta slice. I will look at snapshot of the
    #system taken every delta_slice times (or better: whatever interactions I record in a time
    #span delta_slice, will contribute to the list of recorded interactions [i.e.adjacency
    #list] in the time interval [t, t+delta_slice]).

    #print "time_grid is, ", time_grid

    slice_lump=s.zeros(3)-1. #again, this is simply an initialization
    #the array above will contain views of the network recorded every time_slice (with the
    #meaning expressed above).
    
    tag_tag=tag_tag[:,1:3] #now I have an array for time and one for binary interaction
    #(tag_tag now is an array with 2 columns only containing the data for the binary interaction)

    #print "s.shape(tag_tag is), ", s.shape(tag_tag)
    
    #for i in xrange(len(time_grid)-1):

    #sel_slice=s.where((time_raw>=time_grid[i]) & (time_raw<time_grid[i+1])) #i.e. this

    #selects the binary interaction in a generic delta_slice time interval that scans
    #through all recorded times.

    #my_slice=tag_tag[sel_slice, :] # binary contacts in the bins
    #my_slice=my_slice[0]  #this is a bit tricky, but it looks the only ways to
    #extract the binary interactions (from an array with TWO columns) in delta_slice


    my_slice=tag_tag
    
    #Now I have to be careful and introduce an extra if condition: it is possible that
    #for a given time slice, no contacts are detected. This case has to be spotted,
    #otherwise the routine would crash.

    if (s.shape(my_slice)[0]>0): #i.e. some contacts have been detected


        #print "my_slice is, ", my_slice
        #print "s.shape(my_slice) is, ", s.shape(my_slice)

        #Now I replace my slice with an array where no line is ever repeated

        #This is the underlying philosophy: I read all the binary interactions taking place in the
        #system among all tags in a given delta_slice.
        #Now, first of all the interactions are already ordered in increasing order (i.e.
        #in each line I can have 123 345 but NOT 345 123) (but this is done by some other routine)

        #I could have twice the same entry e.g.

        # e.g.

        #   123 345
        #   123 345

        #if e.g. the same two tags interact twice in the same delta_slice.
        #NB: the SAME tags, but TWO different interactions in the same time slice (with two
        #different associated timestamps [too different to be within the experimental tolerance])

        #However, since I want a single "view" of the system in a given delta_slice, I will get rid
        #of duplicate entries like the one above. I will simply say that there has been a contact
        #between tag 123 and 345 in delta_slice (hence there will be adjacent in the network).

        #using the tag_tag contact list, I fundamentally get a series of adjacency lists as
        #time progresses



        my_slice=order_count(my_slice)[0] #this way I get the unique rows of tag_tag in
        # a given delta_slice


        my_time_slice=s.ones(len(my_slice))*time_grid #I choose to label time
        #by the upper boundary of the [time_grid[i], time_grid[i+1]] time interval
        #(this is a convention which can be changed).

        slice_lump_temp=s.transpose(s.vstack((my_time_slice,s.transpose(my_slice))))
        slice_lump=s.vstack((slice_lump, slice_lump_temp))

        #the previous two lines are just a (complicated) way to pack together the list of
        #binary contacts (adjacency matrices) as time progresses


    #Now get rid of the initialization

    #print "slice_lump is, ", slice_lump
    
    slice_lump=slice_lump[1:len(slice_lump), :]

    #p.save("sliced_dynamics_self_loops.dat",slice_lump, fmt='%d') #to save everything \
    #as an array of integers

    #Now get rid of the self-loops
    #sel=s.where(slice_lump[:,1]!=slice_lump[:,2])

    
    
    #slice_lump_no_loops=slice_lump[sel,:][0]  #again, careful since this can be tricky!
    #print "slice_lump_new is, ", slice_lump
    #p.save("sliced_dynamics.dat",slice_lump, fmt='%d') 

    
    return slice_lump







class OpenBeaconSystem:
    max_time = -1

    def __init__(self):
        self.Contact = []
        self.ContactHashSet = set()

    def process_contact(self, c):
        self.max_time = c.t

        # discard multiple contact instances reported by different stations (and a 1-second timestamp difference)
        h = c.get_hash()
        if h in self.ContactHashSet:
            return

        self.ContactHashSet.add(h)
        self.Contact.append(c)

        mylog.write('%s\n' % c)

    def cleanup_contacts(self):
        self.max_time = int(time.time())
        self.ContactFresh = filter(lambda c: (self.max_time - c.t) < DELTA, self.Contact)
        self.ContactHashSet = set( [c.get_hash() for c in self.ContactFresh] )
        self.Contact = []

    def get_contacts(self, C):
        print ' #contacts:', len(C)

        data = pickle.dumps(
            [ (c.t, c.ip, c.id, c.seq, c.flags, c.seen_id, c.seen_pwr, c.seen_cnt) for c in C]
            ) + '\0'
        return data

# ------------------------------------------------------------------------------------


start=1243839974 # 1240068738   
delta=3600*24 #i.e. I read 4 hours of data

delta_t=5

time_dep =0 #whether I want to get a dynamic list of the interaction or "freeze"
#everything in a single file

delta_slice=20

dumpfile="log_1243839973.pcap"

track_tag_id=1101 #id of the tag whose whose interaction I want to track



iter_local( start, delta, dumpfile, delta_t, delta_slice,time_dep)




# test=iter_process_packet_reduced_improved_new( start, delta,dumpfile, delta_t )

# p.save("detection_arr.dat",test, fmt='%d') #to save everything as an array of integers

# my_graph_list=output_graph_improved(test,time_dep)

# p.save("tag-tag-binary-contacts.dat",my_graph_list, fmt='%d' )

# sliced_interactions=time_binned_interaction(my_graph_list, delta_slice)

# p.save("sliced_dynamics_self_loops.dat",sliced_interactions[0], fmt='%d')

# p.save("sliced_dynamics.dat",sliced_interactions[1], fmt='%d')



# ##################################################################################
# ##################################################################################
# ##################################################################################
# ##################################################################################

# #Now some tests to be conducted on a single tag
# #which I commented since I verified that the code passes them.


# #single_tag_dyn=single_tag_contact_times(sliced_interactions, track_tag_id)



# # interval_single=interval_between_contacts_single_tag(single_tag_dyn, delta_slice)

# # p.save("interval_single.dat",interval_single, fmt='%d')


# # duration_single=contact_duration_single_tag(single_tag_dyn, delta_slice)

# # p.save("duration_single.dat",duration_single, fmt='%d')


# #duration_single2=contact_duration_and_interval_single_tag(single_tag_dyn, delta_slice)[0]

# #p.save("duration_single2.dat",duration_single2, fmt='%d')


# #interval_single2=contact_duration_and_interval_single_tag(single_tag_dyn, delta_slice)[1]

# #p.save("interval_single2.dat",interval_single2, fmt='%d')


# # p.save("detection_arr.dat",test, fmt='%d')

# #End of the tests

# ##################################################################################
# ##################################################################################
# ##################################################################################
# ##################################################################################



# # interval_between_contacts_many_tags(sliced_interactions, delta_slice)

# # contact_duration_many_tags(sliced_interactions, delta_slice)

# #the following function now replaces the previous two (tested that I get the same results)

# #I modified the calls to these functions since now sliced_interactions is a list

# contact_duration_and_interval_many_tags(sliced_interactions[0], delta_slice)

# hash_and_time=couple_hash_table(sliced_interactions[0])


# detect_binary_contacts_among_same_two_tags(hash_and_time, delta_slice)




print "So far so good"
myprojects-hg-reborn Fork

Tags

Frequently used words (click to add to your profile)

File Info

Content

myprojects-hg-reborn
Fork