⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wordcount.py

📁 hadoop:Nutch集群平台
💻 PY
字号:
## Copyright 2006 The Apache Software Foundation## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at##     http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.#from org.apache.hadoop.fs import Pathfrom org.apache.hadoop.io import *from org.apache.hadoop.mapred import *import sysimport getoptclass WordCountMap(Mapper, MapReduceBase):    one = IntWritable(1)    def map(self, key, value, output, reporter):        for w in value.toString().split():            output.collect(Text(w), self.one)class Summer(Reducer, MapReduceBase):    def reduce(self, key, values, output, reporter):        sum = 0        while values.hasNext():            sum += values.next().get()        output.collect(key, IntWritable(sum))def printUsage(code):    print "wordcount [-m <maps>] [-r <reduces>] <input> <output>"    sys.exit(code)def main(args):    conf = JobConf(WordCountMap);    conf.setJobName("wordcount");     conf.setOutputKeyClass(Text);    conf.setOutputValueClass(IntWritable);        conf.setMapperClass(WordCountMap);            conf.setCombinerClass(Summer);    conf.setReducerClass(Summer);    try:        flags, other_args = getopt.getopt(args[1:], "m:r:")    except getopt.GetoptError:        printUsage(1)    if len(other_args) != 2:        printUsage(1)        for f,v in flags:        if f == "-m":            conf.setNumMapTasks(int(v))        elif f == "-r":            conf.setNumReduceTasks(int(v))    conf.setInputPath(Path(other_args[0]))    conf.setOutputPath(Path(other_args[1]))    JobClient.runJob(conf);if __name__ == "__main__":    main(sys.argv)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -