📄 epc2d.c
字号:
for (i=0; i<signature_bit_list->no_signature; i++) { compare_result = compare_signature_bit_score(current_signature->next_signature, signature_bit); if (compare_result == 1) { new_signature->next_signature = current_signature->next_signature; current_signature->next_signature = new_signature; signature_bit_list->no_signature++; inserted=1; break; } current_signature = current_signature->next_signature; } if (inserted==0) { current_signature->next_signature = new_signature; new_signature->next_signature = NULL; signature_bit_list->no_signature++; }}/*------------------------------------------------------------------------ Function : refine_signature_bit_list Purpose : Just to discard signature_bit after no_left ----------------------------------------------------------------------*/ void refine_signature_bit_list (Signature_Bit_List *signature_bit_list, int no_left){ int i; Signature_Bitptr current_signature = signature_bit_list->next; Signature_Bitptr temp_signature; if (signature_bit_list->no_signature > no_left) { for (i=0; i<no_left; i++) current_signature = current_signature->next_signature; temp_signature = current_signature->next_signature; current_signature->next_signature = NULL; while (temp_signature != NULL) { current_signature = temp_signature->next_signature; free (temp_signature->bitmap); free (temp_signature); temp_signature = current_signature; } signature_bit_list->no_signature = no_left; }}/*------------------------------------------------------------------------ Function : diff_signature_bitmap Purpose : to count the number of different entry in two signature (bitmap) return : No. of different entry ----------------------------------------------------------------------*/ int diff_signature_bitmap (int *sig1, int *sig2, int nodimension){ int i, j; int result=0; int WordNo; int bit; WordNo = (nodimension-1)/32; for (i=0; i<=WordNo; i++) { bit = sig1[i] ^ sig2[i]; for (j=0; j<32; j++) { if (((bit>>j) & 1) == 1) result++; } } return result;}/*------------------------------------------------------------------------ Function : subset_signature_bitmap Purpose : to test if sig2 (bitmap) is a subset of sig1 (bitmap) to test if sig1 (bitmap) is a superset of sig2 (bitmap) Return : 1 if yes, 0 if no ----------------------------------------------------------------------*/ int subset_signature_bitmap (int *sig1, int *sig2, int nodimension){ int i, j; int WordNo; int result = 1; WordNo = (nodimension-1)/32; for (i=0; i<=WordNo; i++) { for (j=0; j<32; j++) { if (((sig2[i]>>j) & 1) == 1) { if (((sig1[i]>>j) & 1) != 1) { result = 0; break; } } } } return result;}/*------------------------------------------------------------------------ Function : remove_signature_bit Purpose : to insert a signature_bit into a signature_bit_list ----------------------------------------------------------------------*/ void remove_signature_bit (Signature_Bit_List *signature_bit_list, int position) { int i; Signature_Bitptr current_signature; Signature_Bitptr temp_signature; current_signature = signature_bit_list->next; for (i=0; i<position; i++) current_signature = current_signature->next_signature; temp_signature = current_signature->next_signature; current_signature->next_signature = temp_signature->next_signature; free (temp_signature->bitmap); free (temp_signature);} /*------------------------------------------------------------------------ Function : combine_signature_bit_list Purpose : to combine "similar signatures" ----------------------------------------------------------------------*/ void combine_signature_bit_list (FILE *fp, Signature_Bit_List *signature_bit_list, int nodimension){ Signature_Bitptr signature1, signature2; Signature_Bitptr temp_signature; Signature_Bitptr new_signature; int combined=1; int out_combined=1; int WordNo = (nodimension-1)/32; int i, j; ID_Planeptr current_plane; float similarity; float highest_similar; Signature_Bitptr remove_signature; Signature_Bitptr combined_signature; signature1 = signature_bit_list->next; /* repeat to scan the signature bit list until no combination in previous iteration */while (combined==1) { printf ("no signature is %d\n", signature_bit_list->no_signature); combined=0; signature1 = signature_bit_list->next; highest_similar = 0.0; while ((signature1!=NULL) && (signature1->next_signature != NULL)) { signature2 = signature1->next_signature; while ((signature2!=NULL) && (signature2->next_signature != NULL)) { /* only test on similarity if the two signature bits differ from equal or less then nodimension / 5 */ if (diff_signature_bitmap (signature1->next_signature->bitmap, signature2->next_signature->bitmap, nodimension) <= nodimension/5 ) { /* if the two signatures are not in conflict */ if (eq_signature_bit (signature1->next_signature, signature2->next_signature)) { /* generate new_signature and their similarity */ new_signature = (Signature_Bitptr) similar_signatures(signature1->next_signature, signature2->next_signature, &similarity); /* simlarity > 0.3 is a heristics */ if (similarity>0.3) { if (similarity > highest_similar) { highest_similar = similarity; printf ("%.3f highest similar\n", highest_similar); remove_signature = signature2; combined_signature = signature1; } } } } signature2 = signature2->next_signature; } signature1 = signature1->next_signature; } if (highest_similar >0.0) { print_signature_bit (fp, combined_signature->next_signature, nodimension); fprintf (fp, "can be combined with \n"); print_signature_bit (fp, remove_signature->next_signature, nodimension); fprintf (fp, "\n"); new_signature = (Signature_Bitptr) similar_signatures(combined_signature->next_signature, remove_signature->next_signature, &similarity); fprintf (fp, "to become \n"); print_signature_bit (fp, new_signature, nodimension); fprintf (fp, "similarity is %.3f\n", similarity); /* set combined flag to indicate combination happen in this iteration */ combined=1; /* remove signatre2->next_signature */ temp_signature = remove_signature->next_signature; remove_signature->next_signature = temp_signature->next_signature; fprintf (fp, "removing "); print_signature_bit (fp, temp_signature, nodimension); signature_bit_list->no_signature--; if (temp_signature->bitmap != NULL) free (temp_signature->bitmap); free_id_plane (temp_signature->next_plane); free (temp_signature); /* replace signature1->next_signature by new_signature */ temp_signature = combined_signature->next_signature; new_signature->next_signature = temp_signature->next_signature; combined_signature->next_signature = new_signature; free (temp_signature->bitmap); free_id_plane (temp_signature->next_plane); free (temp_signature); } }}void free_id_plane (ID_Planeptr the){ ID_Planeptr next = the->next; while (next != NULL) { free (the); the = next; next = the->next; } free (the);} Signature_Bitptr similar_signatures (Signature_Bit *sig1, Signature_Bit *sig2, float *similarity) { int common, uncommon1, uncommon2; int no_int; ID_Planeptr current_plane; int position, WordNo; Signature_Bitptr the; /* Allocate the the (Signature_Bit) structure */ the = (Signature_Bit *)malloc (sizeof (Signature_Bit)); the->score = 0; the->next_signature = NULL; the->no_id_plane = 0; the->next_plane = (ID_Planeptr) malloc (sizeof (ID_Plane)); the->next_plane->dimension1=-1; the->next_plane->dimension2=-1; the->next_plane->no_id = 0; the->next_plane->next = NULL; the->bitmap = (int *)malloc (sizeof (sig1->bitmap));
#ifndef WIN32 bzero (the->bitmap, sizeof (sig1->bitmap));#else
memset(the->bitmap, 0, sizeof(sig1->bitmap));
#endif common = common_id_plane (sig1, sig2, &uncommon1, &uncommon2, the->next_plane); the->no_id_plane = common;// the->no_id_plane += uncommon1;// the->no_id_plane += uncommon2; if (sig1->score >= sig2->score) the->no_id_plane += uncommon1; else if (sig2->score >= sig1->score) the->no_id_plane += uncommon2; *similarity = (float)common/(float)(common+uncommon1+uncommon2); current_plane = the->next_plane; while (current_plane->next != NULL) { position = current_plane->next->dimension1 %32; WordNo = current_plane->next->dimension1 / 32; the->bitmap[WordNo] |= (1<<position); position = current_plane->next->dimension2 %32; WordNo = current_plane->next->dimension2 / 32; the->bitmap[WordNo] |= (1<<position); current_plane = current_plane->next; } the->score = sig1->score + sig2->score; return (the);}int common_id_plane (Signature_Bit *sig1, Signature_Bit *sig2, int *uncommon1, int *uncommon2, ID_Planeptr the){ int no_common=0; ID_Planeptr plane1 = sig1->next_plane; ID_Planeptr plane2 = sig2->next_plane; *uncommon1=0; *uncommon2=0; while ((plane1->next != NULL) && (plane2->next != NULL)) { if (compare_id_plane (plane1->next, plane2->next) == 0) { no_common++; insert_id_plane (plane1->next->dimension1, plane1->next->dimension2, plane1->next->id[0], the); plane1 = plane1->next; plane2=plane2->next; } else if (compare_id_plane (plane1->next, plane2->next) == -1) { (*uncommon1)++; if (sig1->score >= sig2->score) { insert_id_plane (plane1->next->dimension1, plane1->next->dimension2, plane1->next->id[0], the); } plane1 = plane1->next; } else if (compare_id_plane (plane1->next, plane2->next) == 1) { (*uncommon2)++; if (sig1->score <= sig2->score) { insert_id_plane (plane2->next->dimension1, plane2->next->dimension2, plane2->next->id[0], the); } plane2 = plane2->next; } } while (plane1->next != NULL) { (*uncommon1)++; if (sig1->score >= sig2->score) { insert_id_plane (plane1->next->dimension1, plane1->next->dimension2, plane1->next->id[0], the); } plane1 = plane1->next; } while (plane2->next != NULL) { (*uncommon2)++; if (sig1->score <= sig2->score) { insert_id_plane (plane2->next->dimension1, plane2->next->dimension2, plane2->next->id[0], the); } plane2 = plane2->next; } return no_common;} /* return 1 if plane 1 < plane2, -1 if plane1 < plane2, 0 if plane1 == plane2 */int compare_id_plane (ID_Planeptr plane1, ID_Planeptr plane2){ if ((plane1->dimension1 == plane2->dimension1) && (plane1->dimension2 == plane2->dimension2)) return 0; if (plane1->dimension1 < plane2->dimension1) return -1; else if (plane1->dimension1 > plane2->dimension1) return 1; else if (plane1->dimension2 < plane2->dimension2) return -1; else if (plane1->dimension2 > plane2->dimension2) return 1;}void remove_id_plane (Signature_Bit *sig1, ID_Planeptr current) { ID_Planeptr next = current->next->next; free (current->next); current->next = next; sig1->no_id_plane--;} Signature_Bit *derive_signature_bit (const int *signature, int nodimension, int no_entry) { int i; int no_int; int dimension1, dimension2; Signature_Bit *the; int position, WordNo; ID_Planeptr id_planeptr, temp_id_planeptr; int no_related_dimension=0; /* Allocate the the (Signature_Bit) structure */ no_int = nodimension/32; if (no_int * 32 < nodimension) no_int++; the = (Signature_Bit *)malloc (sizeof (Signature_Bit)); the->score = 0; the->next_signature = NULL; the->no_id_plane = 0; the->next_plane = (ID_Planeptr) malloc (sizeof (ID_Plane)); the->next_plane->dimension1=-1; the->next_plane->dimension2=-1; the->next_plane->no_id = 0; the->next_plane->next = NULL; the->bitmap = (int *)malloc (sizeof (int) * no_int);
#ifndef WIN32 bzero (the->bitmap, sizeof (int) * no_int);#else
memset(the->bitmap, 0, sizeof(int)*no_int);
#endif for (i=0; i<no_entry; i++) { if (signature[i] != 0) { /* find the dimensions it represent for each non-zero entry, tranform to bitmap */ dense_id_2_dim (i, nodimension, &dimension1, &dimension2); position = dimension1 % 32; WordNo = dimension1 / 32; the->bitmap[WordNo] |= (1<<position); position = dimension2 % 32; WordNo = dimension2 / 32; the->bitmap[WordNo] |= (1<<position); if (insert_id_plane (dimension1, dimension2, signature[i], the->next_plane)==1) the->no_id_plane++; } } for (i=0; i<nodimension; i++) { position = i % 32; WordNo = i / 32; if (((the->bitmap[WordNo] >> position) & 1) == 1) no_related_dimension++; } if ((no_related_dimension != 0) && (no_related_dimension != 1)) the->score = (float) no_related_dimension * (float) the->no_id_plane / (float)combination (no_related_dimension, 2); return (the);} int eq_id_plane (ID_Planeptr target_planeptr, ID_Planeptr src_planeptr){ ID_Planeptr id_planeptr=target_planeptr; /* transverse the id_plane linked list until a suitable place to insert the new id_plane */ while ((id_planeptr->next != NULL) && (id_planeptr->next->dimension1<src_planeptr->dimension1)) id_planeptr = id_planeptr->next; while ((id_planeptr->next != NULL) && (id_planeptr->next->dimension2<src_planeptr->dimension2) && (id_planeptr->next->dimension1<=src_planeptr->dimension1)) id_planeptr = id_planeptr->next; /* return TRUE if target_planeptr list does not contain id in src_planeptr */ if (id_planeptr -> next == NULL) return 1; /* if there are same plane */ if ((id_planeptr-> next->dimension1 == src_planeptr->dimension1) && (id_planeptr->next->dimension2 == src_planeptr->dimension2)) { /* return TRUE if target_planeptr list contains src_plane and have the same id */ if (id_planeptr->next->id[0] == src_planeptr->id[0]) return 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -