📄 evaluatechunk.cpp
字号:
} else { if (seq[i + 1][seq[i + 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 1] == i_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else { return 0; }}// counting number of chunks (IOB1)int count_chunks_iob1(int human_model, sequence & seq, string b_tag, string i_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (human_model == 1 && is_start_of_chunk_iob1(1, i, seq, b_tag, i_tag)) { count++; } if (human_model == 2 && is_start_of_chunk_iob1(2, i, seq, b_tag, i_tag)) { count++; } } return count;}// is matching chunk (IOB1)? int is_matching_chunk_iob1(int i, sequence & seq, string b_tag, string i_tag) { if (!is_start_of_chunk_iob1(1, i, seq, b_tag, i_tag) || !is_start_of_chunk_iob1(2, i, seq, b_tag, i_tag)) { return 0; } int len = seq.size(); int j = i, k = i; while (j < len) { if (is_end_of_chunk_iob1(1, j, seq, b_tag, i_tag)) { break; } else { j++; } } while (k < len) { if (is_end_of_chunk_iob1(2, k, seq, b_tag, i_tag)) { break; } else { k++; } } return (j == k);}// counting matching chunks (IOB1)int count_matching_chunks_iob1(sequence & seq, string b_tag, string i_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (is_start_of_chunk_iob1(1, i, seq, b_tag, i_tag)) { if (is_matching_chunk_iob1(i, seq, b_tag, i_tag)) { count++; } } } return count;}//==================================================================double chunk_evaluate_ioe2(dataset & data, chunkset & chunks) { vector<int> human_chk_count; vector<int> model_chk_count; vector<int> match_chk_count; int i; int num_chunks = chunks.size(); for (i = 0; i < num_chunks; i++) { human_chk_count.push_back(0); model_chk_count.push_back(0); match_chk_count.push_back(0); } dataset::iterator datait; for (datait = data.begin(); datait != data.end(); datait++) { for (i = 0; i < num_chunks; i++) { human_chk_count[i] += count_chunks_ioe2(1, *datait, chunks[i][0], chunks[i][1]); model_chk_count[i] += count_chunks_ioe2(2, *datait, chunks[i][0], chunks[i][1]); match_chk_count[i] += count_matching_chunks_ioe2(*datait, chunks[i][0], chunks[i][1]); } } printf("\tChunk-based performance evaluation:\n\n"); printf("\t\tChunk\tManual\tModel\tMatch\tPre.(%)\tRec.(%)\tF1-Measure(%)\n"); printf("\t\t-----\t------\t-----\t-----\t-------\t-------\t-------------\n"); int count = 0; double pre = 0.0, rec = 0.0, f1 = 0.0; double total1_pre = 0.0, total1_rec = 0.0, total1_f1 = 0.0; double total2_pre = 0.0, total2_rec = 0.0, total2_f1 = 0.0; int total_human = 0, total_model = 0, total_match = 0; for (i = 0; i < num_chunks; i++) { if (model_chk_count[i] > 0) { pre = (double)match_chk_count[i] / model_chk_count[i]; total_model += model_chk_count[i]; total1_pre += pre; } else { pre = 0.0; } if (human_chk_count[i] > 0) { rec = (double)match_chk_count[i] / human_chk_count[i]; total_human += human_chk_count[i]; total1_rec += rec; count++; } else { rec = 0.0; } total_match += match_chk_count[i]; if (pre + rec > 0) { f1 = (double) 2 * pre * rec / (pre + rec); } else { f1 = 0.0; } printf("\t\t%s\t%d\t%d\t%d\t%6.2f\t%6.2f\t%6.2f\n", chunks[i][2].c_str(), human_chk_count[i], model_chk_count[i], match_chk_count[i], pre * 100, rec * 100, f1 * 100); } printf("\t\t-----\t------\t-----\t-----\t-------\t-------\t-------------\n"); if (count > 0) { total1_pre /= count; total1_rec /= count; if (total1_pre + total1_rec > 0) { total1_f1 = 2 * total1_pre * total1_rec / (total1_pre + total1_rec); } printf("\t\tAvg1.\t\t\t\t%6.2f\t%6.2f\t%6.2f\n", total1_pre * 100, total1_rec * 100, total1_f1 * 100); } if (total_model > 0) { total2_pre = (double)total_match / total_model; } if (total_human > 0) { total2_rec = (double)total_match / total_human; } if (total2_pre + total2_rec > 0) { total2_f1 = 2 * total2_rec * total2_pre / (total2_rec + total2_pre); } printf("\t\tAvg2.\t%d\t%d\t%d\t%6.2f\t%6.2f\t%6.2f\n\n", total_human, total_model, total_match, total2_pre * 100, total2_rec * 100, total2_f1 * 100); return total2_f1 * 100;}// is start of a chunk (IOE2)?int is_start_of_chunk_ioe2(int human_model, int i, sequence & seq, string i_tag, string e_tag) { if (human_model == 1) { if (seq[i][seq[i].size() - 2] == e_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 2] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 2] == i_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 2] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else if (human_model == 2) { if (seq[i][seq[i].size() - 1] == e_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 1] == i_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else { return 0; }}// is end of a chunk (IOE2)?int is_end_of_chunk_ioe2(int human_model, int i, sequence & seq, string i_tag, string e_tag) { if (human_model == 1) { return (seq[i][seq[i].size() - 2] == e_tag); } else if (human_model == 2) { return (seq[i][seq[i].size() - 1] == e_tag); } else { return 0; }}// counting number of chunks (IOE2)int count_chunks_ioe2(int human_model, sequence & seq, string i_tag, string e_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (human_model == 1 && is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag)) { count++; } if (human_model == 2 && is_start_of_chunk_ioe2(2, i, seq, i_tag, e_tag)) { count++; } } return count;}// is matching chunk (IOE2)? int is_matching_chunk_ioe2(int i, sequence & seq, string i_tag, string e_tag) { if (!is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag) || !is_start_of_chunk_ioe2(2, i, seq, i_tag, e_tag)) { return 0; } int len = seq.size(); int j = i, k = i; while (j < len) { if (is_end_of_chunk_ioe2(1, j, seq, i_tag, e_tag)) { break; } else { j++; } } while (k < len) { if (is_end_of_chunk_ioe2(2, k, seq, i_tag, e_tag)) { break; } else { k++; } } return (j == k);}// counting matching chunks (IOE2)int count_matching_chunks_ioe2(sequence & seq, string i_tag, string e_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (is_start_of_chunk_ioe2(1, i, seq, i_tag, e_tag)) { if (is_matching_chunk_ioe2(i, seq, i_tag, e_tag)) { count++; } } } return count;}//======================================================================double chunk_evaluate_ioe1(dataset & data, chunkset & chunks) { vector<int> human_chk_count; vector<int> model_chk_count; vector<int> match_chk_count; int i; int num_chunks = chunks.size(); for (i = 0; i < num_chunks; i++) { human_chk_count.push_back(0); model_chk_count.push_back(0); match_chk_count.push_back(0); } dataset::iterator datait; for (datait = data.begin(); datait != data.end(); datait++) { for (i = 0; i < num_chunks; i++) { human_chk_count[i] += count_chunks_ioe1(1, *datait, chunks[i][0], chunks[i][1]); model_chk_count[i] += count_chunks_ioe1(2, *datait, chunks[i][0], chunks[i][1]); match_chk_count[i] += count_matching_chunks_ioe1(*datait, chunks[i][0], chunks[i][1]); } } printf("\tChunk-based performance evaluation:\n\n"); printf("\t\tChunk\tManual\tModel\tMatch\tPre.(%)\tRec.(%)\tF1-Measure(%)\n"); printf("\t\t-----\t------\t-----\t-----\t-------\t-------\t-------------\n"); int count = 0; double pre = 0.0, rec = 0.0, f1 = 0.0; double total1_pre = 0.0, total1_rec = 0.0, total1_f1 = 0.0; double total2_pre = 0.0, total2_rec = 0.0, total2_f1 = 0.0; int total_human = 0, total_model = 0, total_match = 0; for (i = 0; i < num_chunks; i++) { if (model_chk_count[i] > 0) { pre = (double)match_chk_count[i] / model_chk_count[i]; total_model += model_chk_count[i]; total1_pre += pre; } else { pre = 0.0; } if (human_chk_count[i] > 0) { rec = (double)match_chk_count[i] / human_chk_count[i]; total_human += human_chk_count[i]; total1_rec += rec; count++; } else { rec = 0.0; } total_match += match_chk_count[i]; if (pre + rec > 0) { f1 = (double) 2 * pre * rec / (pre + rec); } else { f1 = 0.0; } printf("\t\t%s\t%d\t%d\t%d\t%6.2f\t%6.2f\t%6.2f\n", chunks[i][2].c_str(), human_chk_count[i], model_chk_count[i], match_chk_count[i], pre * 100, rec * 100, f1 * 100); } printf("\t\t-----\t------\t-----\t-----\t-------\t-------\t-------------\n"); if (count > 0) { total1_pre /= count; total1_rec /= count; if (total1_pre + total1_rec > 0) { total1_f1 = 2 * total1_pre * total1_rec / (total1_pre + total1_rec); } printf("\t\tAvg1.\t\t\t\t%6.2f\t%6.2f\t%6.2f\n", total1_pre * 100, total1_rec * 100, total1_f1 * 100); } if (total_model > 0) { total2_pre = (double)total_match / total_model; } if (total_human > 0) { total2_rec = (double)total_match / total_human; } if (total2_pre + total2_rec > 0) { total2_f1 = 2 * total2_rec * total2_pre / (total2_rec + total2_pre); } printf("\t\tAvg2.\t%d\t%d\t%d\t%6.2f\t%6.2f\t%6.2f\n\n", total_human, total_model, total_match, total2_pre * 100, total2_rec * 100, total2_f1 * 100); return total2_f1 * 100;}// is start of a chunk (IOE1)?int is_start_of_chunk_ioe1(int human_model, int i, sequence & seq, string i_tag, string e_tag) { if (human_model == 1) { if (seq[i][seq[i].size() - 2] == e_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 2] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 2] == i_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 2] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else if (human_model == 2) { if (seq[i][seq[i].size() - 1] == e_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else if (seq[i][seq[i].size() - 1] == i_tag) { if (i <= 0) { return 1; } else { if (seq[i - 1][seq[i - 1].size() - 1] != i_tag) { return 1; } else { return 0; } } } else { return 0; } } else { return 0; }}// is end of a chunk (IOE1)?int is_end_of_chunk_ioe1(int human_model, int i, sequence & seq, string i_tag, string e_tag) { if (human_model == 1) { if (seq[i][seq[i].size() - 2] == e_tag) { return 1; } else if (seq[i][seq[i].size() - 2] == i_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 2] == e_tag || seq[i + 1][seq[i + 1].size() - 2] == i_tag) { return 0; } else { return 1; } } } else { return 0; } } else if (human_model == 2) { if (seq[i][seq[i].size() - 1] == e_tag) { return 1; } else if (seq[i][seq[i].size() - 1] == i_tag) { if (i >= seq.size() - 1) { return 1; } else { if (seq[i + 1][seq[i + 1].size() - 1] == e_tag || seq[i + 1][seq[i + 1].size() - 1] == i_tag) { return 0; } else { return 1; } } } else { return 0; } } else { return 0; }}// counting number of chunks (IOE1)int count_chunks_ioe1(int human_model, sequence & seq, string i_tag, string e_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (human_model == 1 && is_start_of_chunk_ioe1(1, i, seq, i_tag, e_tag)) { count++; } if (human_model == 2 && is_start_of_chunk_ioe1(2, i, seq, i_tag, e_tag)) { count++; } } return count;}// is matching chunk (IOE1)? int is_matching_chunk_ioe1(int i, sequence & seq, string i_tag, string e_tag) { if (!is_start_of_chunk_ioe1(1, i, seq, i_tag, e_tag) || !is_start_of_chunk_ioe1(2, i, seq, i_tag, e_tag)) { return 0; } int len = seq.size(); int j = i, k = i; while (j < len) { if (is_end_of_chunk_ioe1(1, j, seq, i_tag, e_tag)) { break; } else { j++; } } while (k < len) { if (is_end_of_chunk_ioe1(2, k, seq, i_tag, e_tag)) { break; } else { k++; } } return (j == k);}// counting matching chunks (IOE1)int count_matching_chunks_ioe1(sequence & seq, string i_tag, string e_tag) { int count = 0; for (int i = 0; i < seq.size(); i++) { if (is_start_of_chunk_ioe1(1, i, seq, i_tag, e_tag)) { if (is_matching_chunk_ioe1(i, seq, i_tag, e_tag)) { count++; } } } return count;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -