1 0 yongsun /* 2 82 yongsun * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 82 yongsun * 4 82 yongsun * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 5 82 yongsun * 6 82 yongsun * The contents of this file are subject to the terms of either the GNU Lesser 7 82 yongsun * General Public License Version 2.1 only ("LGPL") or the Common Development and 8 82 yongsun * Distribution License ("CDDL")(collectively, the "License"). You may not use this 9 82 yongsun * file except in compliance with the License. You can obtain a copy of the CDDL at 10 82 yongsun * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 11 82 yongsun * http://www.opensource.org/licenses/lgpl-license.php. See the License for the 12 82 yongsun * specific language governing permissions and limitations under the License. When 13 82 yongsun * distributing the software, include this License Header Notice in each file and 14 82 yongsun * include the full text of the License in the License file as well as the 15 82 yongsun * following notice: 16 82 yongsun * 17 82 yongsun * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 18 82 yongsun * (CDDL) 19 82 yongsun * For Covered Software in this distribution, this License shall be governed by the 20 82 yongsun * laws of the State of California (excluding conflict-of-law provisions). 21 82 yongsun * Any litigation relating to this License shall be subject to the jurisdiction of 22 82 yongsun * the Federal Courts of the Northern District of California and the state courts 23 82 yongsun * of the State of California, with venue lying in Santa Clara County, California. 24 82 yongsun * 25 82 yongsun * Contributor(s): 26 82 yongsun * 27 82 yongsun * If you wish your version of this file to be governed by only the CDDL or only 28 82 yongsun * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 29 82 yongsun * include this software in this distribution under the [CDDL or LGPL Version 2.1] 30 82 yongsun * license." If you don't indicate a single choice of license, a recipient has the 31 82 yongsun * option to distribute your version of this file under either the CDDL or the LGPL 32 82 yongsun * Version 2.1, or to extend the choice of license to its licensees as provided 33 82 yongsun * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 34 82 yongsun * Version 2 license, then the option applies only if the new code is made subject 35 82 yongsun * to such option by the copyright holder. 36 0 yongsun */ 37 82 yongsun 38 0 yongsun #ifdef HAVE_CONFIG_H 39 0 yongsun #include "config.h" 40 0 yongsun #endif 41 0 yongsun 42 0 yongsun #ifdef HAVE_ASSERT_H 43 0 yongsun #include <assert.h> 44 0 yongsun #endif 45 0 yongsun 46 0 yongsun #include <stdio.h> 47 0 yongsun #include <unistd.h> 48 208 tchaikov #include <stdlib.h> 49 0 yongsun 50 0 yongsun #include <vector> 51 0 yongsun #include <map> 52 0 yongsun #include <math.h> 53 0 yongsun 54 0 yongsun #include "../sim_slm.h" 55 0 yongsun #include "../slm.h" 56 0 yongsun 57 0 yongsun #include "ValueCompress.h" 58 0 yongsun 59 0 yongsun class CSIMSlmWithIteration : public CSIMSlm{ 60 0 yongsun public: 61 0 yongsun struct TLevelIterator { 62 0 yongsun std::vector<int> m_history; 63 0 yongsun }; 64 0 yongsun 65 0 yongsun public: 66 0 yongsun int 67 0 yongsun getLevelSize(int lvl) { return sz[lvl]; } 68 0 yongsun 69 0 yongsun int 70 0 yongsun getN() { return N; } 71 0 yongsun 72 0 yongsun void 73 0 yongsun getIdString(TLevelIterator& it, std::vector<TSIMWordId>& history); 74 0 yongsun 75 0 yongsun void 76 0 yongsun beginLevelIteration(int lvl, TLevelIterator& it); 77 0 yongsun 78 0 yongsun void 79 0 yongsun next(TLevelIterator& it); 80 0 yongsun 81 0 yongsun bool 82 0 yongsun isEnd(TLevelIterator& it); 83 0 yongsun 84 0 yongsun TLeaf* 85 0 yongsun getNodePtr(TLevelIterator& it); 86 0 yongsun 87 0 yongsun int 88 0 yongsun findState(int n, TSIMWordId*hw); 89 0 yongsun 90 0 yongsun void 91 0 yongsun findBackOffState(int n, TSIMWordId*hw, unsigned & bol, unsigned& bon); 92 0 yongsun 93 0 yongsun 94 0 yongsun protected: 95 0 yongsun void 96 0 yongsun adjustIterator(TLevelIterator& it); 97 0 yongsun }; 98 0 yongsun 99 0 yongsun int 100 0 yongsun CSIMSlmWithIteration::findState(int n, TSIMWordId*hw) 101 0 yongsun { 102 0 yongsun if (n == 0) return 0; 103 0 yongsun 104 0 yongsun int m = -1; 105 0 yongsun while (n > N) { --n; ++hw; } 106 0 yongsun 107 0 yongsun void* pstate = ((TNode*)level[0]); 108 0 yongsun for (int lvl=0; lvl < n && pstate != NULL; ++lvl) { 109 0 yongsun int h = ((TNode*)pstate)->child; 110 0 yongsun int t = (((TNode*)pstate)+1)->child; 111 0 yongsun if (lvl == N-1) { 112 0 yongsun TLeaf* p = (TLeaf*)level[lvl+1]; 113 0 yongsun pstate = (void*)binary_find_id(p+h, p+t, hw[lvl]); 114 0 yongsun m = (pstate != NULL)?(((TLeaf*)pstate) - p):(-1); 115 0 yongsun } else { 116 0 yongsun TNode* p = (TNode*)level[lvl+1]; 117 0 yongsun pstate = (void*)binary_find_id(p+h, p+t, hw[lvl]); 118 0 yongsun m = (pstate != NULL)?(((TNode*)pstate) - p):(-1); 119 0 yongsun } 120 0 yongsun } 121 0 yongsun return m; 122 0 yongsun } 123 0 yongsun 124 0 yongsun void 125 0 yongsun CSIMSlmWithIteration::findBackOffState(int n, TSIMWordId*hw, unsigned & bol, unsigned& bon) 126 0 yongsun { 127 0 yongsun while (n > 1) { 128 0 yongsun --n; ++hw; 129 0 yongsun int idx = findState(n, hw); 130 0 yongsun if (idx >= 0 && ((TNode*)(level[n]))[idx].child < ((TNode*)(level[n]))[idx+1].child) { 131 0 yongsun bol = n; bon = idx; return; 132 0 yongsun } 133 0 yongsun } 134 0 yongsun bol = bon = 0; 135 0 yongsun return; 136 0 yongsun } 137 0 yongsun 138 0 yongsun void 139 0 yongsun CSIMSlmWithIteration::getIdString(TLevelIterator& it, std::vector<TSIMWordId>& history) 140 0 yongsun { 141 0 yongsun history.clear(); 142 0 yongsun for (int i=1, tmp_sz=it.m_history.size(); i < tmp_sz; ++i) { 143 0 yongsun int idx = it.m_history[i]; 144 0 yongsun if (i == N) 145 0 yongsun history.push_back(((TLeaf*)(level[i]))[idx].id); 146 0 yongsun else 147 0 yongsun history.push_back(((TNode*)(level[i]))[idx].id); 148 0 yongsun } 149 0 yongsun } 150 0 yongsun 151 0 yongsun void 152 0 yongsun CSIMSlmWithIteration::beginLevelIteration(int lvl, TLevelIterator& it) 153 0 yongsun { 154 0 yongsun it.m_history.clear(); 155 0 yongsun for (int i=0, tmp_sz=lvl; i <= tmp_sz; ++i) 156 0 yongsun it.m_history.push_back(0); 157 0 yongsun adjustIterator(it); 158 0 yongsun } 159 0 yongsun 160 0 yongsun void 161 0 yongsun CSIMSlmWithIteration::next(TLevelIterator& it) 162 0 yongsun { 163 0 yongsun ++(it.m_history.back()); 164 0 yongsun adjustIterator(it); 165 0 yongsun } 166 0 yongsun 167 0 yongsun bool 168 0 yongsun CSIMSlmWithIteration::isEnd(TLevelIterator& it) 169 0 yongsun { 170 0 yongsun return ((it.m_history.back()+1 >= sz[it.m_history.size()-1])); 171 0 yongsun } 172 0 yongsun 173 0 yongsun void 174 0 yongsun CSIMSlmWithIteration::adjustIterator(TLevelIterator& it) 175 0 yongsun { 176 0 yongsun int ch = it.m_history.back(); 177 0 yongsun for (int i= it.m_history.size()-2; i >= 0; --i) { 178 0 yongsun int len = sz[i]; 179 0 yongsun int& parent = it.m_history[i]; 180 0 yongsun TNode* pn = (TNode*)(level[i]); 181 0 yongsun while (parent < len && pn[parent+1].child <= ch) 182 0 yongsun ++parent; 183 0 yongsun ch = parent; 184 0 yongsun } 185 0 yongsun } 186 0 yongsun 187 0 yongsun CSIMSlm::TLeaf* 188 0 yongsun CSIMSlmWithIteration::getNodePtr(TLevelIterator& it) 189 0 yongsun { 190 0 yongsun int lvl = it.m_history.size()-1; 191 0 yongsun int idx = it.m_history.back(); 192 0 yongsun if (lvl == N) 193 0 yongsun return (((TLeaf*)(level[lvl]))+idx); 194 0 yongsun else 195 0 yongsun return (((TNode*)(level[lvl]))+idx); 196 0 yongsun } 197 0 yongsun 198 0 yongsun 199 0 yongsun 200 0 yongsun void ShowUsage() 201 0 yongsun { 202 0 yongsun printf("Usage:\n"); 203 0 yongsun printf(" slmthread primitive_slm threaded_slm\n"); 204 0 yongsun printf("\nDescription:\n"); 205 0 yongsun printf(" slmthread add back-off-state for each slm node in the primitive_slm. "); 206 0 yongsun printf("Also it compresses 32-bit float into 16 bit representation.\n\n"); 207 0 yongsun exit(100); 208 0 yongsun } 209 0 yongsun 210 0 yongsun FILE* fp = NULL; 211 0 yongsun CThreadSlm::TNode* levels[16]; 212 0 yongsun CThreadSlm::TLeaf* lastLevel; 213 0 yongsun 214 0 yongsun int main(int argc, char* argv[]) 215 0 yongsun { 216 0 yongsun CValueCompressor vc; 217 0 yongsun unsigned int bol, bon; 218 0 yongsun CSIMSlmWithIteration slm; 219 0 yongsun std::vector<TSIMWordId> history; 220 0 yongsun float real_pr, eff_pr, real_bow, eff_bow; 221 0 yongsun 222 0 yongsun std::map<float, float> pr_eff, bow_eff; // effval --> val 223 0 yongsun std::map<float, int> pr_values, bow_values; // effval --> freq 224 0 yongsun std::map<float, int> pr_map, bow_map; // result: val --> int 225 0 yongsun std::vector<float> pr_table, bow_table; // result: val vector 226 0 yongsun std::vector<float>::iterator itt, itte; 227 0 yongsun 228 0 yongsun if (argc != 3) 229 0 yongsun ShowUsage(); 230 0 yongsun 231 0 yongsun printf("Loading original slm..."); fflush(stdout); 232 0 yongsun if (slm.Load(argv[1]) == false) 233 0 yongsun ShowUsage(); 234 0 yongsun 235 0 yongsun bool usingLogPr = slm.isUseLogPr(); 236 0 yongsun 237 0 yongsun #define EffectivePr(a) (float((usingLogPr)?((a)/log(2.0)):(-log2((a))))) 238 0 yongsun #define OriginalPr(b) (float((usingLogPr)?((b)*log(2.0)):(exp2(-(b))))) 239 0 yongsun #define EffectiveBow(a) (float((usingLogPr)?(exp(-(a))):((a)))) 240 0 yongsun #define OriginalBow(b) (float((usingLogPr)?(-log((b))):((b)))) 241 0 yongsun 242 0 yongsun printf("\nfirst pass..."); fflush(stdout); 243 0 yongsun for (int lvl=0; lvl <= slm.getN(); ++lvl) { 244 0 yongsun CSIMSlmWithIteration::TLevelIterator it; 245 0 yongsun slm.beginLevelIteration(lvl, it); 246 0 yongsun for (; !slm.isEnd(it); slm.next(it)) { 247 0 yongsun CSIMSlm::TLeaf* pl = slm.getNodePtr(it); 248 0 yongsun real_pr = pl->pr; 249 0 yongsun eff_pr = EffectivePr(real_pr); 250 0 yongsun if (pr_eff.find(eff_pr) == pr_eff.end()) { 251 0 yongsun pr_eff[eff_pr] = real_pr; 252 0 yongsun } else { // precision error cause non 1:1 mapping 253 0 yongsun pr_eff[eff_pr] = OriginalPr(eff_pr); 254 0 yongsun } 255 0 yongsun ++(pr_values[eff_pr]); 256 0 yongsun if (lvl < slm.getN()) { 257 0 yongsun real_bow = ((CSIMSlm::TNode*)pl)->bow; 258 0 yongsun eff_bow = EffectiveBow(real_bow); 259 0 yongsun if (bow_eff.find(eff_bow) == bow_eff.end()) { 260 0 yongsun bow_eff[eff_bow] = real_bow; 261 0 yongsun } else { // two values map to same distance value due to precision error 262 0 yongsun bow_eff[eff_bow] = OriginalBow(eff_bow); 263 0 yongsun } 264 0 yongsun ++(bow_values[eff_bow]); 265 0 yongsun } 266 0 yongsun } 267 0 yongsun } 268 0 yongsun 269 0 yongsun // Following pr value should not be grouped, or as milestone values. 270 0 yongsun static float msprs[] = { 271 0 yongsun 0.9, 0.8, 0.7, 0.6, 272 0 yongsun 1.0/2, 1.0/4, 1.0/8, 1.0/16, 1.0/32, 1.0/64, 1.0/128, 273 0 yongsun 1.0/256, 1.0/512, 1.0/1024, 1.0/2048, 1.0/4096, 1.0/8192, 274 0 yongsun 1.0/16384, 1.0/32768, 1.0/65536 275 0 yongsun }; 276 0 yongsun 277 0 yongsun for (unsigned i=0, sz=sizeof(msprs)/sizeof(float); i < sz; ++i) { 278 0 yongsun float real_pr = (usingLogPr)?(-log(msprs[i])):(msprs[i]); 279 0 yongsun float eff_pr = EffectivePr(real_pr); 280 0 yongsun if (pr_eff.find(eff_pr) == pr_eff.end()) { 281 0 yongsun pr_eff[eff_pr] = real_pr; 282 0 yongsun } else { // precision error cause non 1:1 mapping 283 0 yongsun pr_eff[eff_pr] = OriginalPr(eff_pr); 284 0 yongsun } 285 0 yongsun pr_values[eff_pr] = 0; 286 0 yongsun } 287 0 yongsun 288 0 yongsun // Following bow value should not be grouped, or as milestone values. 289 0 yongsun static float msbows[] = { 290 0 yongsun 1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 291 0 yongsun 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001, 292 0 yongsun 0.00005, 0.00001, 0.000005, 0.000001, 0.0000005, 0.0000001 293 0 yongsun }; 294 0 yongsun 295 0 yongsun for (unsigned i=0, sz=sizeof(msbows)/sizeof(float); i < sz; ++i) { 296 0 yongsun float real_bow = (usingLogPr)?(-log(msbows[i])):(msbows[i]); 297 0 yongsun float eff_bow = EffectiveBow(real_bow); 298 0 yongsun if (bow_eff.find(eff_bow) == bow_eff.end()) { 299 0 yongsun bow_eff[eff_bow] = real_bow; 300 0 yongsun } else { // two values map to same distance value due to precision error 301 0 yongsun bow_eff[eff_bow] = OriginalBow(eff_bow); 302 0 yongsun } 303 0 yongsun bow_values[eff_bow] = 0; 304 0 yongsun } 305 0 yongsun 306 0 yongsun printf("\nCompressing pr values..."); fflush(stdout); 307 0 yongsun vc(pr_eff, pr_values, pr_map, pr_table, (1 << CThreadSlm::BITS_PR)); 308 0 yongsun pr_values.clear(); 309 0 yongsun itte = pr_table.end(); 310 0 yongsun for (itt = pr_table.begin(); itt != itte; ++itt) { 311 0 yongsun *itt = OriginalPr(*itt); 312 0 yongsun assert(usingLogPr || (*itt > 0.0 && *itt < 1.0)); 313 0 yongsun assert(!usingLogPr || *itt > 0.0); 314 0 yongsun } 315 0 yongsun printf("%d float values ==> %d values", pr_eff.size(), pr_table.size()); 316 0 yongsun 317 0 yongsun printf("\nCompressing bow values..."); fflush(stdout); 318 0 yongsun vc(bow_eff, bow_values, bow_map, bow_table, (1 << CThreadSlm::BITS_BOW)); 319 0 yongsun bow_values.clear(); 320 0 yongsun itte = bow_table.end(); 321 0 yongsun for (itt = bow_table.begin(); itt != itte; ++itt) 322 0 yongsun *itt = OriginalBow(*itt); 323 0 yongsun printf("%d float values ==> %d values", bow_eff.size(), bow_table.size()); 324 0 yongsun 325 0 yongsun 326 0 yongsun printf("\nThreading the new model..."); fflush(stdout); 327 0 yongsun for (int lvl=0; lvl < slm.getN(); ++lvl) { 328 0 yongsun levels[lvl] = new CThreadSlm::TNode[slm.getLevelSize(lvl)]; 329 0 yongsun 330 0 yongsun CSIMSlmWithIteration::TLevelIterator it; 331 0 yongsun slm.beginLevelIteration(lvl, it); 332 0 yongsun for (; !slm.isEnd(it); slm.next(it)) { 333 0 yongsun slm.getIdString(it, history); 334 0 yongsun if (history.size() == 0) { 335 0 yongsun slm.findBackOffState(lvl, NULL, bol, bon); 336 0 yongsun } else { 337 0 yongsun slm.findBackOffState(lvl, &history[0], bol, bon); 338 0 yongsun } 339 0 yongsun 340 0 yongsun CSIMSlm::TNode* pn = (CSIMSlm::TNode*)slm.getNodePtr(it); 341 0 yongsun CThreadSlm::TNode& nn = levels[lvl][it.m_history.back()]; 342 0 yongsun 343 0 yongsun std::map<float, int>::iterator prit = pr_map.find(pn->pr); 344 0 yongsun if (prit == pr_map.end()) { // This would be cause by precision error 345 0 yongsun double val = EffectivePr(pn->pr); 346 0 yongsun val = OriginalPr(val); 347 0 yongsun prit = pr_map.find(val); 348 0 yongsun assert(prit != pr_map.end()); 349 0 yongsun } 350 0 yongsun int idx_pr = prit->second; 351 0 yongsun nn.set_pr(idx_pr); 352 0 yongsun 353 0 yongsun nn.set_wid(pn->id); 354 0 yongsun nn.set_bon(bon); 355 0 yongsun nn.set_bol(bol); 356 0 yongsun 357 0 yongsun std::map<float, int>::iterator bowit = bow_map.find(pn->bow); 358 0 yongsun if (bowit == bow_map.end()) { // precision error 359 0 yongsun double val = EffectiveBow(pn->bow); 360 0 yongsun val = OriginalBow(val); 361 0 yongsun bowit = bow_map.find(val); 362 0 yongsun assert(bowit != bow_map.end()); 363 0 yongsun } 364 0 yongsun int idx_bow = bowit->second; 365 0 yongsun nn.set_bow(idx_bow); 366 0 yongsun 367 0 yongsun nn.set_ch(pn->child); 368 0 yongsun 369 0 yongsun assert(usingLogPr || (pr_table[idx_pr] > 0.0 && pr_table[idx_pr] < 1.0)); 370 0 yongsun assert(!usingLogPr || pr_table[idx_pr] > 0.0); 371 0 yongsun } 372 0 yongsun CSIMSlm::TNode* pn = (CSIMSlm::TNode*)slm.getNodePtr(it); 373 0 yongsun CThreadSlm::TNode& nn = levels[lvl][it.m_history.back()]; 374 0 yongsun nn.set_ch(pn->child); 375 0 yongsun }; 376 0 yongsun 377 0 yongsun 378 0 yongsun lastLevel = new CThreadSlm::TLeaf [slm.getLevelSize(slm.getN())]; 379 0 yongsun CSIMSlmWithIteration::TLevelIterator it; 380 0 yongsun slm.beginLevelIteration(slm.getN(), it); 381 0 yongsun for (int lvl=slm.getN(); !slm.isEnd(it); slm.next(it)) { 382 0 yongsun CSIMSlm::TLeaf* pn = slm.getNodePtr(it); 383 0 yongsun slm.getIdString(it, history); 384 0 yongsun slm.findBackOffState(lvl, &history[0], bol, bon); 385 0 yongsun 386 0 yongsun CThreadSlm::TLeaf& nn = lastLevel[it.m_history.back()]; 387 0 yongsun 388 0 yongsun std::map<float, int>::iterator prit = pr_map.find(pn->pr); 389 0 yongsun if (prit == pr_map.end()) { // This would be cause by precision error 390 0 yongsun double val = EffectivePr(pn->pr); 391 0 yongsun val = OriginalPr(val); 392 0 yongsun prit = pr_map.find(val); 393 0 yongsun assert(prit != pr_map.end()); 394 0 yongsun } 395 0 yongsun int idx_pr = prit->second; 396 0 yongsun nn.set_pr(idx_pr); 397 0 yongsun 398 0 yongsun nn.set_wid(pn->id); 399 0 yongsun nn.set_bon(bon); 400 0 yongsun nn.set_bol(bol); 401 0 yongsun } 402 0 yongsun 403 0 yongsun 404 0 yongsun printf("\nWriting out..."); fflush(stdout); 405 0 yongsun 406 0 yongsun float dummy = 0.0; 407 0 yongsun fp = fopen(argv[2], "wb"); 408 0 yongsun int N = slm.getN(); 409 0 yongsun fwrite(&N, sizeof(int), 1, fp); 410 0 yongsun unsigned ulp = slm.isUseLogPr(); 411 0 yongsun fwrite(&ulp, sizeof(unsigned), 1, fp); 412 0 yongsun 413 0 yongsun for (int lvl = 0; lvl <= N; ++lvl) { 414 0 yongsun int len = slm.getLevelSize(lvl); 415 0 yongsun fwrite(&len, sizeof(int), 1, fp); 416 0 yongsun } 417 0 yongsun fwrite(&pr_table[0], sizeof(float), pr_table.size(), fp); 418 0 yongsun for (int i = pr_table.size(), sz=(1 << CThreadSlm::BITS_PR); i < sz; ++i) 419 0 yongsun fwrite(&dummy, sizeof(float), 1, fp); 420 0 yongsun 421 0 yongsun fwrite(&bow_table[0], sizeof(float), bow_table.size(), fp); 422 0 yongsun for (int i = bow_table.size(), sz=(1 << CThreadSlm::BITS_BOW); i < sz; ++i) 423 0 yongsun fwrite(&dummy, sizeof(float), 1, fp); 424 0 yongsun 425 0 yongsun for (int lvl=0; lvl < N; ++lvl) 426 0 yongsun fwrite(levels[lvl], sizeof(CThreadSlm::TNode), slm.getLevelSize(lvl), fp); 427 0 yongsun fwrite(lastLevel, sizeof(CThreadSlm::TLeaf), slm.getLevelSize(N), fp); 428 0 yongsun fclose(fp); 429 0 yongsun 430 0 yongsun printf("done!\n"); fflush(stdout); 431 0 yongsun 432 0 yongsun delete [] lastLevel; 433 0 yongsun for (int lvl=0; lvl < N; ++lvl) 434 0 yongsun delete []levels[lvl]; 435 0 yongsun 436 0 yongsun bow_values.clear(); 437 0 yongsun bow_map.clear(); 438 0 yongsun bow_table.clear(); 439 0 yongsun 440 0 yongsun pr_values.clear(); 441 0 yongsun pr_map.clear(); 442 0 yongsun pr_table.clear(); 443 0 yongsun 444 0 yongsun slm.Free(); 445 0 yongsun 446 0 yongsun 447 0 yongsun return 0; 448 0 yongsun } 449