1 0 yongsun /* 2 82 yongsun * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 82 yongsun * 4 82 yongsun * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 5 82 yongsun * 6 82 yongsun * The contents of this file are subject to the terms of either the GNU Lesser 7 82 yongsun * General Public License Version 2.1 only ("LGPL") or the Common Development and 8 82 yongsun * Distribution License ("CDDL")(collectively, the "License"). You may not use this 9 82 yongsun * file except in compliance with the License. You can obtain a copy of the CDDL at 10 82 yongsun * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 11 82 yongsun * http://www.opensource.org/licenses/lgpl-license.php. See the License for the 12 82 yongsun * specific language governing permissions and limitations under the License. When 13 82 yongsun * distributing the software, include this License Header Notice in each file and 14 82 yongsun * include the full text of the License in the License file as well as the 15 82 yongsun * following notice: 16 82 yongsun * 17 82 yongsun * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 18 82 yongsun * (CDDL) 19 82 yongsun * For Covered Software in this distribution, this License shall be governed by the 20 82 yongsun * laws of the State of California (excluding conflict-of-law provisions). 21 82 yongsun * Any litigation relating to this License shall be subject to the jurisdiction of 22 82 yongsun * the Federal Courts of the Northern District of California and the state courts 23 82 yongsun * of the State of California, with venue lying in Santa Clara County, California. 24 82 yongsun * 25 82 yongsun * Contributor(s): 26 82 yongsun * 27 82 yongsun * If you wish your version of this file to be governed by only the CDDL or only 28 82 yongsun * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 29 82 yongsun * include this software in this distribution under the [CDDL or LGPL Version 2.1] 30 82 yongsun * license." If you don't indicate a single choice of license, a recipient has the 31 82 yongsun * option to distribute your version of this file under either the CDDL or the LGPL 32 82 yongsun * Version 2.1, or to extend the choice of license to its licensees as provided 33 82 yongsun * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 34 82 yongsun * Version 2 license, then the option applies only if the new code is made subject 35 82 yongsun * to such option by the copyright holder. 36 0 yongsun */ 37 82 yongsun 38 0 yongsun #ifndef _SIM_IDNGRAM_MERGE_H 39 0 yongsun #define _SIM_IDNGRAM_MERGE_H 40 0 yongsun 41 0 yongsun #include <stdio.h> 42 0 yongsun #include <map> 43 0 yongsun #include <vector> 44 0 yongsun #include <algorithm> 45 0 yongsun 46 0 yongsun #include "../sim_fmerge.h" 47 0 yongsun #include "idngram.h" 48 0 yongsun 49 0 yongsun template<int N> 50 0 yongsun void DoIdngramMerge(FILE*out, CMultiWayFileMerger<CSIM_IdngramFreq<N> > &merger) 51 0 yongsun { 52 0 yongsun merger.start(); 53 0 yongsun CSIM_IdngramFreq<N> prevItem; 54 0 yongsun while (true) { 55 0 yongsun file_para<CSIM_IdngramFreq<N> > * ppara = merger.getBest(); 56 0 yongsun TUnitAndParaInfo<CSIM_IdngramFreq<N> > & upi = *(*ppara); 57 0 yongsun if (upi.runOut) { 58 0 yongsun if (prevItem.freq != 0) { 59 0 yongsun fwrite(prevItem.ids, sizeof(TSIMWordId), N, out); 60 0 yongsun fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out); 61 0 yongsun } 62 0 yongsun break; 63 0 yongsun } 64 0 yongsun CSIM_IdngramFreq<N>& ng = upi.unit; 65 0 yongsun if (!(prevItem == ng)) { 66 0 yongsun if (prevItem.freq != 0) { 67 0 yongsun fwrite(prevItem.ids, sizeof(TSIMWordId), N, out); 68 0 yongsun fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out); 69 0 yongsun } 70 0 yongsun prevItem = ng; 71 0 yongsun } else { 72 0 yongsun prevItem.freq += ng.freq; 73 0 yongsun } 74 0 yongsun merger.next(); 75 0 yongsun } 76 0 yongsun } 77 0 yongsun 78 0 yongsun template<int N> 79 0 yongsun void ProcessingIdngramMerge(FILE *swap, FILE* out, std::vector<long>& para_offsets) 80 0 yongsun { 81 0 yongsun CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger; 82 0 yongsun long s = 0; 83 0 yongsun for (int i=0; i < para_offsets.size(); ++i) { 84 0 yongsun merger.addPara(swap, s, para_offsets[i]); 85 0 yongsun s = para_offsets[i]; 86 0 yongsun } 87 0 yongsun DoIdngramMerge<N>(out, merger); 88 0 yongsun } 89 0 yongsun 90 0 yongsun template<int N> 91 0 yongsun void ProcessingIdngramMerge(FILE* out, std::vector<FILE* >& file_list) 92 0 yongsun { 93 0 yongsun CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger; 94 0 yongsun for (int i=0; i < file_list.size(); ++i) { 95 0 yongsun fseek(file_list[i], 0, SEEK_END); 96 0 yongsun merger.addPara(file_list[i], 0, ftell(file_list[i])); 97 0 yongsun } 98 0 yongsun DoIdngramMerge<N>(out, merger); 99 0 yongsun } 100 0 yongsun 101 0 yongsun #endif 102 0 yongsun 103