- 論壇徽章:
- 0
|
<div><span class="Apple-style-span" style="line-height: 18px; font-size: 11.6667px; color: rgb(102, 102, 102); font-family: Arial; -webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px; "><p style="line-height: normal; ">mapreduce程序設(shè)計</p><p style="line-height: normal; ">import java.io.IOException;<br style="line-height: normal; ">import org.apache.hadoop.conf.Configuration;<br style="line-height: normal; ">import org.apache.hadoop.conf.Configured;<br style="line-height: normal; ">import org.apache.hadoop.fs.Path;<br style="line-height: normal; ">import org.apache.hadoop.io.LongWritable;<br style="line-height: normal; ">import org.apache.hadoop.io.Text;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.Job;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.Mapper;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.Reducer;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;<br style="line-height: normal; ">import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;</p><p style="line-height: normal; ">import org.apache.hadoop.util.Tool; </p><p style="line-height: normal; ">/*org.apache.hadoop.mapreduce.lib. 取代org.apache.hadoop.mapred.xxx,這里的改變讓程序員修改代碼時會更加的方便,比原來能夠少寫很多代碼</p><p style="line-height: normal; ">the old API++++++++++++++++++++++++++++++++++++++++++++++++++</p><p style="line-height: normal; ">public static class MapClass extends MapReduceBase<br style="line-height: normal; ">implements Mapper<K1, V1, K2, V2> {<br style="line-height: normal; ">public void map(K1 key, V1 value,<br style="line-height: normal; ">OutputCollector<K2, V2> output,<br style="line-height: normal; ">Reporter reporter) throws IOException { }<br style="line-height: normal; ">}<br style="line-height: normal; ">public static class Reduce extends MapReduceBase<br style="line-height: normal; ">implements Reducer<K2, V2, K3, V3> {<br style="line-height: normal; ">public void reduce(K2 key, Iterator<V2> values,<br style="line-height: normal; ">OutputCollector<K3, V3> output,<br style="line-height: normal; ">Reporter reporter) throws IOException { }<br style="line-height: normal; ">}</p><p style="line-height: normal; ">The new API ++++++++++++++++++++++++++++++++++++++++++++++++<br style="line-height: normal; ">public static class MapClass extends Mapper<K1, V1, K2, V2> {<br style="line-height: normal; ">public void map(K1 key, V1 value, Context context)<br style="line-height: normal; ">throws IOException, InterruptedException { }<br style="line-height: normal; ">}</p><p style="line-height: normal; ">public static class Reduce extends Reducer<K2, V2, K3, V3> {<br style="line-height: normal; ">public void reduce(K2 key, Iterable<V2> values, Context context)<br style="line-height: normal; ">throws IOException, InterruptedException { }<br style="line-height: normal; ">}<br style="line-height: normal; "></p><p style="line-height: normal; ">*/<br style="line-height: normal; ">import org.apache.hadoop.util.ToolRunner;<br style="line-height: normal; "><br style="line-height: normal; ">public class tt extends Configured implements Tool {<br style="line-height: normal; ">public static class MapClass<br style="line-height: normal; ">extends Mapper<LongWritable, Text, Text, Text> {<br style="line-height: normal; ">public void map(LongWritable key, Text value, Context context)<br style="line-height: normal; ">throws IOException, InterruptedException {<br style="line-height: normal; ">String[] citation = value.toString().split(",");//split的作用是將該字符串里面的變量賦值給citation這個字符串數(shù)組當中。<br style="line-height: normal; ">context.write(new Text(citation[1]), new Text(citation[0])); //使用新的API取代了collect相關(guān)的API,將map中的key和value進行了互換。<br style="line-height: normal; ">}<br style="line-height: normal; ">}<br style="line-height: normal; ">public static class Reduce extends Reducer<Text, Text, Text, Text> { //前兩個參數(shù)設(shè)置是輸入?yún)?shù),后兩個參數(shù)是輸出參數(shù)。<br style="line-height: normal; ">public void reduce(Text key, Iterable<Text> values,<br style="line-height: normal; ">Context context)<br style="line-height: normal; ">throws IOException, InterruptedException {<br style="line-height: normal; ">String csv ="";<br style="line-height: normal; "><br style="line-height: normal; ">for (Text val:values) {//Text類型是類似于String類型的文本格式,但是在處理編碼上還是和String有差別,與內(nèi)存序列化有關(guān),是hadoop經(jīng)過封裝之后的新類。<br style="line-height: normal; ">if (csv.length() > 0) csv += ",";<br style="line-height: normal; ">csv += val.toString();<br style="line-height: normal; ">}<br style="line-height: normal; "><br style="line-height: normal; ">context.write(key, new Text(csv));<br style="line-height: normal; ">}<br style="line-height: normal; ">}<br style="line-height: normal; ">public int run(String[] args) throws Exception { //由hadoop本身調(diào)用該程序<br style="line-height: normal; ">Configuration conf = getConf();<br style="line-height: normal; ">Job job = new Job(conf, "tt"); //利用job取代了jobclient<br style="line-height: normal; ">job.setJarByClass(tt.class);<br style="line-height: normal; ">Path in = new Path(args[0]);<br style="line-height: normal; ">Path out = new Path(args[1]);<br style="line-height: normal; ">FileInputFormat.setInputPaths(job, in);<br style="line-height: normal; ">FileOutputFormat.setOutputPath(job, out);<br style="line-height: normal; "><span style="line-height: normal; color: rgb(255, 0, 0); ">job.setMapperClass(MapClass.class);</span><br style="line-height: normal; "><span style="line-height: normal; color: rgb(255, 0, 0); ">job.setReducerClass(Reduce.class);</span><br style="line-height: normal; "><span style="line-height: normal; color: rgb(255, 0, 0); ">job.setInputFormatClass(TextInputFormat.class);</span><br style="line-height: normal; "><span style="line-height: normal; color: rgb(255, 0, 0); ">job.setOutputFormatClass(TextOutputFormat.class);</span><br style="line-height: normal; "><span style="line-height: normal; color: rgb(255, 0, 0); ">job.setOutputKeyClass(Text.class);</span><br style="line-height: normal; "><span style="line-height: normal; color: rgb(255, 0, 0); ">job.setOutputValueClass(Text.class); </span> //此處如果不進行設(shè)置,系統(tǒng)會拋出異常,還要記住新舊API不能混用<br style="line-height: normal; ">System.exit(job.waitForCompletion(true)?0:1);<br style="line-height: normal; ">return 0;<br style="line-height: normal; ">}<br style="line-height: normal; ">public static void main(String[] args) throws Exception {<br style="line-height: normal; ">int res = ToolRunner.run(new Configuration(), new tt(), args); //調(diào)用新的類的方法免除配置的相關(guān)瑣碎的細節(jié)<br style="line-height: normal; ">System.exit(res);<br style="line-height: normal; ">}<br style="line-height: normal; ">}</p><p style="line-height: normal; ">上面的代碼在eclipse中是可以運行的,但是輸入文件是hadoop in action中的文件cite75_99.TXT,</p><p style="line-height: normal; ">格式如下:</p><p style="line-height: normal; ">[root@asus input]# head -n 5 cite75_99.txt <br style="line-height: normal; ">"CITING","CITED"<br style="line-height: normal; ">3858241,956203<br style="line-height: normal; ">3858241,1324234<br style="line-height: normal; ">3858241,3398406<br style="line-height: normal; ">3858241,3557384</p><p style="line-height: normal; ">我寫的這個例子開始就是這樣報錯<span style="line-height: normal; "><span style="line-height: normal; ">org</span>.<span style="line-height: normal; ">apache</span>.<span style="line-height: normal; ">hadoop</span>.<span style="line-height: normal; ">io</span>.<span style="line-height: normal; ">LongWritable</span> <span style="line-height: normal; ">cannot</span> </span><br style="line-height: normal; "><span style="line-height: normal; "><span style="line-height: normal; ">be</span> <span style="line-height: normal; ">cast</span> <span style="line-height: normal; ">to</span> <span style="line-height: normal; ">org</span>.<span style="line-height: normal; ">apache</span>.<span style="line-height: normal; ">hadoop</span>.<span style="line-height: normal; ">io</span>.<span style="line-height: normal; ">Text</span> 然后按照上面的程序修改調(diào)用了新的API 就能夠有效的將key的類型設(shè)置成Text,我用紅顏色標記的部分是必須要這樣寫的 因為設(shè)置Text必須要在map reduce 和conf中同時設(shè)置才管用。我的郵箱是shenyanxxxy@qq.com 如果有hadoop的興趣愛好者可以聯(lián)系我 我們共同來商討。</span></p></span></div><div><br></div> |
|