📄 jsd0262.py
字号:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#Jensen-Shannon Divergence
import codecs, re, math
f1=codecs.open(r"./T0262_utf8.txt", "r", "utf8")
f2=codecs.open(r"./T0265_utf8.txt", "r", "utf8")
f3=codecs.open(r"./T0265_utf8_continue.txt", "r", "utf8")
#f4=codecs.open(r"./T0265_jsd2.txt", "w", "utf8")
#creat 2-gram word frequence dictionary of T0262
ls=f1.read()
f1.close()
i=0
pre = ''bi = ''
d1={}
dwf1={}
pattern=re.compile(r"(\[[^]]*\]|.)")
sl=pattern.findall(ls)
for k in sl:
#print k
#raw_input()
if k == '\n':
pre = ''
bi = ''
continue
if k >= u'\u3400':
bi = pre + k
#print bi
#raw_input()
if pre != '': i=i+1 bi = pre + k
#print bi,k,i
#raw_input() if bi in d1: d1[bi]=d1[bi]+1
else:
d1[bi]=1
pre = k
num1=i
wf=d1.items()
for w in wf:
dwf1[w[0]] = w[1]
#creat 2-gram word frequence dictionary of T0265
ls=f2.read()
f2.close()
i=0
pre = ''bi = ''
d2={}
dwf2={}
pattern=re.compile(r"(\[[^]]*\]|.)")
sl=pattern.findall(ls)
for k in sl:
if k == '\n':
pre = ''
bi = ''
continue
if k >= u'\u3400':
if pre != '': i=i+1 bi = pre + k if bi in d2: d2[bi]=d2[bi]+1
else:
d2[bi]=1
pre = k
num2=i
wf=d2.items()
for w in wf:
dwf2[w[0]] = w[1]
# read key word of T0265
ls2=f3.read()
f3.close()
fw = codecs.open('T0265_T0262.html', 'w', 'utf8')
fw.write(u'<html>\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n</head>\n<body>\n<br><h2>T0265 vs T0262 JS Divrgence 娓
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -