📄 stats.py
字号:
data["num_spam_correct"] data["num_incorrect"] = data["num_trained_spam_fn"] + \ data["num_trained_ham_fp"] data["perc_correct"] = 100.0 * data["num_correct"] / \ data["num_seen"] data["perc_incorrect"] = 100.0 * data["num_incorrect"] / \ data["num_seen"] data["perc_fp"] = 100.0 * data["num_trained_ham_fp"] / \ data["num_seen"] data["perc_fn"] = 100.0 * data["num_trained_spam_fn"] / \ data["num_seen"] data["num_unsure_trained_ham"] = data["num_trained_ham"] - \ data["num_trained_ham_fp"] data["num_unsure_trained_spam"] = data["num_trained_spam"] - \ data["num_trained_spam_fn"] data["num_unsure_not_trained"] = data["num_unsure"] - \ data["num_unsure_trained_ham"] - \ data["num_unsure_trained_spam"] if data["num_unsure"]: data["perc_unsure_trained_ham"] = 100.0 * \ data["num_unsure_trained_ham"] / \ data["num_unsure"] data["perc_unsure_trained_spam"] = 100.0 * \ data["num_unsure_trained_spam"] / \ data["num_unsure"] data["perc_unsure_not_trained"] = 100.0 * \ data["num_unsure_not_trained"] / \ data["num_unsure"] data["total_ham"] = data["num_ham_correct"] + \ data["num_trained_ham"] data["total_spam"] = data["num_spam_correct"] + \ data["num_trained_spam"] if data["total_ham"]: data["perc_ham_incorrect"] = 100.0 * \ data["num_trained_ham_fp"] / \ data["total_ham"] data["perc_ham_unsure"] = 100.0 * \ data["num_unsure_trained_ham"] / \ data["total_ham"] data["perc_ham_incorrect_or_unsure"] = \ 100.0 * (data["num_trained_ham_fp"] + data["num_unsure_trained_ham"]) / \ data["total_ham"] if data["total_spam"]: data["perc_spam_correct"] = 100.0 * data["num_spam_correct"] / \ data["total_spam"] data["perc_spam_unsure"] = 100.0 * \ data["num_unsure_trained_spam"] / \ data["total_spam"] data["perc_spam_correct_or_unsure"] = \ 100.0 * (data["num_spam_correct"] + \ data["num_unsure_trained_spam"]) / \ data["total_spam"] fp_cost = self.options["TestDriver", "best_cutoff_fp_weight"] fn_cost = self.options["TestDriver", "best_cutoff_fn_weight"] unsure_cost = self.options["TestDriver", "best_cutoff_unsure_weight"] data["total_cost"] = data["num_trained_ham_fp"] * fp_cost + \ data["num_trained_spam_fn"] * fn_cost + \ data["num_unsure"] * unsure_cost # If there was no filtering done, what would the cost have been? # (Assuming that any spam in the inbox earns the cost of a fn) no_filter_cost = data["num_spam"] * fn_cost data["cost_savings"] = no_filter_cost - data["total_cost"] return data def _AddPercentStrings(self, data, dp): data["perc_ham_s"] = "%%(perc_ham).%df%%(perc)s" % (dp,) data["perc_spam_s"] = "%%(perc_spam).%df%%(perc)s" % (dp,) data["perc_unsure_s"] = "%%(perc_unsure).%df%%(perc)s" % (dp,) data["perc_correct_s"] = "%%(perc_correct).%df%%(perc)s" % (dp,) data["perc_incorrect_s"] = "%%(perc_incorrect).%df%%(perc)s" % (dp,) data["perc_fp_s"] = "%%(perc_fp).%df%%(perc)s" % (dp,) data["perc_fn_s"] = "%%(perc_fn).%df%%(perc)s" % (dp,) data["perc_spam_correct_s"] = "%%(perc_spam_correct).%df%%(perc)s" \ % (dp,) data["perc_spam_unsure_s"] = "%%(perc_spam_unsure).%df%%(perc)s" \ % (dp,) data["perc_spam_correct_or_unsure_s"] = \ "%%(perc_spam_correct_or_unsure).%df%%(perc)s" % (dp,) data["perc_ham_incorrect_s"] = "%%(perc_ham_incorrect).%df%%(perc)s" \ % (dp,) data["perc_ham_unsure_s"] = "%%(perc_ham_unsure).%df%%(perc)s" \ % (dp,) data["perc_ham_incorrect_or_unsure_s"] = \ "%%(perc_ham_incorrect_or_unsure).%df%%(perc)s" % (dp,) data["perc_unsure_trained_ham_s"] = \ "%%(perc_unsure_trained_ham).%df%%(perc)s" % (dp,) data["perc_unsure_trained_spam_s"] = "%%(perc_unsure_trained_spam).%df%%(perc)s" \ % (dp,) data["perc_unsure_not_trained_s"] = "%%(perc_unsure_not_trained).%df%%(perc)s" \ % (dp,) data["perc"] = "%" return data def GetStats(self, use_html=False, session_only=False, decimal_points=1): """Return a description of the statistics. If session_only is True, then only a description of the statistics since we were last reset. Otherwise, lifetime statistics (i.e. those including the ones loaded). Users probably care most about persistent statistics, so present those by default. If session-only stats are desired, then a special call to here can be made. The percentages will be accurate to the given number of decimal points. If use_html is True, then the returned data is marked up with appropriate HTML, otherwise it is plain text. """ chunks = [] push = chunks.append if session_only: data = {} data["num_seen"] = self.num_ham + self.num_spam + \ self.num_unsure data["num_ham"] = self.num_ham data["num_spam"] = self.num_spam data["num_unsure"] = self.num_unsure data["num_trained_ham"] = self.num_trained_ham data["num_trained_ham_fp"] = self.num_trained_ham_fp data["num_trained_spam"] = self.num_trained_spam data["num_trained_spam_fn"] = self.num_trained_spam_fn else: data = self._CombineSessionAndTotal() push(_("Messages classified: %d") % (data["num_seen"],)) if data["num_seen"] == 0: return chunks data = self._CalculateAdditional(data) format_dict = self._AddPercentStrings(data, decimal_points) # Possibly use HTML for tabs. if use_html: format_dict["tab"] = " " else: format_dict["tab"] = "\t" push((_("%(tab)sGood:%(tab)s%(num_ham)d (%(perc_ham_s)s)") \ % format_dict) % format_dict) push((_("%(tab)sSpam:%(tab)s%(num_spam)d (%(perc_spam_s)s)") \ % format_dict) % format_dict) push((_("%(tab)sUnsure:%(tab)s%(num_unsure)d (%(perc_unsure_s)s)") \ % format_dict) % format_dict) push("") push((_("Classified correctly:%(tab)s%(num_correct)d (%(perc_correct_s)s of total)") \ % format_dict) % format_dict) push((_("Classified incorrectly:%(tab)s%(num_incorrect)d (%(perc_incorrect_s)s of total)") \ % format_dict) % format_dict) if format_dict["num_incorrect"]: push((_("%(tab)sFalse positives:%(tab)s%(num_trained_ham_fp)d (%(perc_fp_s)s of total)") \ % format_dict) % format_dict) push((_("%(tab)sFalse negatives:%(tab)s%(num_trained_spam_fn)d (%(perc_fn_s)s of total)") \ % format_dict) % format_dict) push("") push(_("Manually classified as good:%(tab)s%(num_trained_ham)d") % format_dict) push(_("Manually classified as spam:%(tab)s%(num_trained_spam)d") % format_dict) push("") if format_dict["num_unsure"]: push((_("Unsures trained as good:%(tab)s%(num_unsure_trained_ham)d (%(perc_unsure_trained_ham_s)s of unsures)") \ % format_dict) % format_dict) push((_("Unsures trained as spam:%(tab)s%(num_unsure_trained_spam)d (%(perc_unsure_trained_spam_s)s of unsures)") \ % format_dict) % format_dict) push((_("Unsures not trained:%(tab)s%(tab)s%(num_unsure_not_trained)d (%(perc_unsure_not_trained_s)s of unsures)") \ % format_dict) % format_dict) push("") if format_dict["total_spam"]: push((_("Spam correctly identified:%(tab)s%(perc_spam_correct_s)s (+ %(perc_spam_unsure_s)s unsure)") \ % format_dict) % format_dict) if format_dict["total_ham"]: push((_("Good incorrectly identified:%(tab)s%(perc_ham_incorrect_s)s (+ %(perc_ham_unsure_s)s unsure)") \ % format_dict) % format_dict) if format_dict["total_spam"] or format_dict["total_ham"]: push("") push(_("Total cost of spam:%(tab)s$%(total_cost).2f") % format_dict) push(_("SpamBayes savings:%(tab)s$%(cost_savings).2f") % format_dict) return chunksif __name__=='__main__': s = Stats() print "\n".join(s.GetStats())
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -