1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 5 * 6 * The contents of this file are subject to the terms of either the GNU Lesser 7 * General Public License Version 2.1 only ("LGPL") or the Common Development and 8 * Distribution License ("CDDL")(collectively, the "License"). You may not use this 9 * file except in compliance with the License. You can obtain a copy of the CDDL at 10 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 11 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the 12 * specific language governing permissions and limitations under the License. When 13 * distributing the software, include this License Header Notice in each file and 14 * include the full text of the License in the License file as well as the 15 * following notice: 16 * 17 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 18 * (CDDL) 19 * For Covered Software in this distribution, this License shall be governed by the 20 * laws of the State of California (excluding conflict-of-law provisions). 21 * Any litigation relating to this License shall be subject to the jurisdiction of 22 * the Federal Courts of the Northern District of California and the state courts 23 * of the State of California, with venue lying in Santa Clara County, California. 24 * 25 * Contributor(s): 26 * 27 * If you wish your version of this file to be governed by only the CDDL or only 28 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 29 * include this software in this distribution under the [CDDL or LGPL Version 2.1] 30 * license." If you don't indicate a single choice of license, a recipient has the 31 * option to distribute your version of this file under either the CDDL or the LGPL 32 * Version 2.1, or to extend the choice of license to its licensees as provided 33 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 34 * Version 2 license, then the option applies only if the new code is made subject 35 * to such option by the copyright holder. 36 */ 37 38 #ifndef _SIM_IDNGRAM_MERGE_H 39 #define _SIM_IDNGRAM_MERGE_H 40 41 #include <stdio.h> 42 #include <map> 43 #include <vector> 44 #include <algorithm> 45 46 #include "../sim_fmerge.h" 47 #include "idngram.h" 48 49 template<int N> 50 void DoIdngramMerge(FILE*out, CMultiWayFileMerger<CSIM_IdngramFreq<N> > &merger) 51 { 52 merger.start(); 53 CSIM_IdngramFreq<N> prevItem; 54 while (true) { 55 file_para<CSIM_IdngramFreq<N> > * ppara = merger.getBest(); 56 TUnitAndParaInfo<CSIM_IdngramFreq<N> > & upi = *(*ppara); 57 if (upi.runOut) { 58 if (prevItem.freq != 0) { 59 fwrite(prevItem.ids, sizeof(TSIMWordId), N, out); 60 fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out); 61 } 62 break; 63 } 64 CSIM_IdngramFreq<N>& ng = upi.unit; 65 if (!(prevItem == ng)) { 66 if (prevItem.freq != 0) { 67 fwrite(prevItem.ids, sizeof(TSIMWordId), N, out); 68 fwrite(&(prevItem.freq), sizeof(unsigned int), 1, out); 69 } 70 prevItem = ng; 71 } else { 72 prevItem.freq += ng.freq; 73 } 74 merger.next(); 75 } 76 } 77 78 template<int N> 79 void ProcessingIdngramMerge(FILE *swap, FILE* out, std::vector<long>& para_offsets) 80 { 81 CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger; 82 long s = 0; 83 for (int i=0; i < para_offsets.size(); ++i) { 84 merger.addPara(swap, s, para_offsets[i]); 85 s = para_offsets[i]; 86 } 87 DoIdngramMerge<N>(out, merger); 88 } 89 90 template<int N> 91 void ProcessingIdngramMerge(FILE* out, std::vector<FILE* >& file_list) 92 { 93 CMultiWayFileMerger<CSIM_IdngramFreq<N> > merger; 94 for (int i=0; i < file_list.size(); ++i) { 95 fseek(file_list[i], 0, SEEK_END); 96 merger.addPara(file_list[i], 0, ftell(file_list[i])); 97 } 98 DoIdngramMerge<N>(out, merger); 99 } 100 101 #endif 102 103