Home | History | Annotate | Download | only in thread
      1 /*
      2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
      3  *
      4  * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
      5  *
      6  * The contents of this file are subject to the terms of either the GNU Lesser
      7  * General Public License Version 2.1 only ("LGPL") or the Common Development and
      8  * Distribution License ("CDDL")(collectively, the "License"). You may not use this
      9  * file except in compliance with the License. You can obtain a copy of the CDDL at
     10  * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
     11  * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
     12  * specific language governing permissions and limitations under the License. When
     13  * distributing the software, include this License Header Notice in each file and
     14  * include the full text of the License in the License file as well as the
     15  * following notice:
     16  *
     17  * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
     18  * (CDDL)
     19  * For Covered Software in this distribution, this License shall be governed by the
     20  * laws of the State of California (excluding conflict-of-law provisions).
     21  * Any litigation relating to this License shall be subject to the jurisdiction of
     22  * the Federal Courts of the Northern District of California and the state courts
     23  * of the State of California, with venue lying in Santa Clara County, California.
     24  *
     25  * Contributor(s):
     26  *
     27  * If you wish your version of this file to be governed by only the CDDL or only
     28  * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
     29  * include this software in this distribution under the [CDDL or LGPL Version 2.1]
     30  * license." If you don't indicate a single choice of license, a recipient has the
     31  * option to distribute your version of this file under either the CDDL or the LGPL
     32  * Version 2.1, or to extend the choice of license to its licensees as provided
     33  * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
     34  * Version 2 license, then the option applies only if the new code is made subject
     35  * to such option by the copyright holder.
     36  */
     37 
     38 #ifdef HAVE_CONFIG_H
     39 #include "config.h"
     40 #endif
     41 
     42 #ifdef HAVE_ASSERT_H
     43 #include <assert.h>
     44 #endif
     45 
     46 #include <stdio.h>
     47 #include <unistd.h>
     48 #include <stdlib.h>
     49 
     50 #include <vector>
     51 #include <map>
     52 #include <math.h>
     53 
     54 #include "../sim_slm.h"
     55 #include "../slm.h"
     56 
     57 #include "ValueCompress.h"
     58 
     59 class CSIMSlmWithIteration : public CSIMSlm{
     60 public:
     61     struct TLevelIterator {
     62         std::vector<int>    m_history;
     63     };
     64 
     65 public:
     66     int
     67     getLevelSize(int lvl) { return sz[lvl]; }
     68 
     69     int
     70     getN() { return N; }
     71 
     72     void
     73     getIdString(TLevelIterator& it, std::vector<TSIMWordId>& history);
     74 
     75     void
     76     beginLevelIteration(int lvl, TLevelIterator& it);
     77 
     78     void
     79     next(TLevelIterator& it);
     80 
     81     bool
     82     isEnd(TLevelIterator& it);
     83 
     84     TLeaf*
     85     getNodePtr(TLevelIterator& it);
     86 
     87     int
     88     findState(int n, TSIMWordId*hw);
     89 
     90     void
     91     findBackOffState(int n, TSIMWordId*hw, unsigned & bol, unsigned& bon);
     92 
     93 
     94 protected:
     95     void
     96     adjustIterator(TLevelIterator& it);
     97 };
     98 
     99 int
    100 CSIMSlmWithIteration::findState(int n, TSIMWordId*hw)
    101 {
    102     if (n == 0) return 0;
    103 
    104     int m = -1;
    105         while (n > N) { --n; ++hw; }
    106 
    107         void* pstate = ((TNode*)level[0]);
    108         for (int lvl=0; lvl < n && pstate != NULL; ++lvl) {
    109             int h = ((TNode*)pstate)->child;
    110             int t = (((TNode*)pstate)+1)->child;
    111             if (lvl == N-1) {
    112                 TLeaf* p = (TLeaf*)level[lvl+1];
    113                 pstate = (void*)binary_find_id(p+h, p+t, hw[lvl]);
    114                 m = (pstate != NULL)?(((TLeaf*)pstate) - p):(-1);
    115             } else {
    116                 TNode* p = (TNode*)level[lvl+1];
    117                 pstate = (void*)binary_find_id(p+h, p+t, hw[lvl]);
    118                 m = (pstate != NULL)?(((TNode*)pstate) - p):(-1);
    119             }
    120         }
    121     return m;
    122 }
    123 
    124 void
    125 CSIMSlmWithIteration::findBackOffState(int n, TSIMWordId*hw, unsigned & bol, unsigned& bon)
    126 {
    127     while (n > 1) {
    128         --n; ++hw;
    129         int idx = findState(n, hw);
    130         if (idx >= 0 && ((TNode*)(level[n]))[idx].child < ((TNode*)(level[n]))[idx+1].child) {
    131             bol = n; bon = idx; return;
    132         }
    133     }
    134     bol = bon = 0;
    135     return;
    136 }
    137 
    138 void
    139 CSIMSlmWithIteration::getIdString(TLevelIterator& it, std::vector<TSIMWordId>& history)
    140 {
    141     history.clear();
    142     for (int i=1, tmp_sz=it.m_history.size(); i < tmp_sz; ++i) {
    143         int idx = it.m_history[i];
    144         if (i == N)
    145             history.push_back(((TLeaf*)(level[i]))[idx].id);
    146         else
    147             history.push_back(((TNode*)(level[i]))[idx].id);
    148     }
    149 }
    150 
    151 void
    152 CSIMSlmWithIteration::beginLevelIteration(int lvl, TLevelIterator& it)
    153 {
    154     it.m_history.clear();
    155     for (int i=0, tmp_sz=lvl; i <= tmp_sz; ++i)
    156         it.m_history.push_back(0);
    157     adjustIterator(it);
    158 }
    159 
    160 void
    161 CSIMSlmWithIteration::next(TLevelIterator& it)
    162 {
    163     ++(it.m_history.back());
    164     adjustIterator(it);
    165 }
    166 
    167 bool
    168 CSIMSlmWithIteration::isEnd(TLevelIterator& it)
    169 {
    170     return ((it.m_history.back()+1 >= sz[it.m_history.size()-1]));
    171 }
    172 
    173 void
    174 CSIMSlmWithIteration::adjustIterator(TLevelIterator& it)
    175 {
    176     int ch = it.m_history.back();
    177     for (int i= it.m_history.size()-2; i >= 0; --i) {
    178         int len = sz[i];
    179         int& parent = it.m_history[i];
    180         TNode* pn = (TNode*)(level[i]);
    181         while (parent < len && pn[parent+1].child <= ch)
    182             ++parent;
    183         ch = parent;
    184     }
    185 }
    186 
    187 CSIMSlm::TLeaf*
    188 CSIMSlmWithIteration::getNodePtr(TLevelIterator& it)
    189 {
    190     int lvl = it.m_history.size()-1;
    191     int idx = it.m_history.back();
    192     if (lvl == N)
    193         return (((TLeaf*)(level[lvl]))+idx);
    194     else
    195         return (((TNode*)(level[lvl]))+idx);
    196 }
    197 
    198 
    199 
    200 void ShowUsage()
    201 {
    202     printf("Usage:\n");
    203     printf("    slmthread primitive_slm threaded_slm\n");
    204     printf("\nDescription:\n");
    205     printf("    slmthread add back-off-state for each slm node in the primitive_slm. ");
    206     printf("Also it compresses 32-bit float into 16 bit representation.\n\n");
    207     exit(100);
    208 }
    209 
    210 FILE* fp = NULL;
    211 CThreadSlm::TNode* levels[16];
    212 CThreadSlm::TLeaf* lastLevel;
    213 
    214 int main(int argc, char* argv[])
    215 {
    216     CValueCompressor vc;
    217     unsigned int bol, bon;
    218     CSIMSlmWithIteration slm;
    219     std::vector<TSIMWordId> history;
    220     float real_pr, eff_pr, real_bow, eff_bow;
    221 
    222     std::map<float, float> pr_eff, bow_eff;     // effval --> val
    223     std::map<float, int> pr_values, bow_values; // effval --> freq
    224     std::map<float, int> pr_map, bow_map;       // result: val --> int
    225     std::vector<float>   pr_table, bow_table;   // result: val vector
    226     std::vector<float>::iterator itt, itte;
    227 
    228     if (argc != 3)
    229         ShowUsage();
    230 
    231     printf("Loading original slm..."); fflush(stdout);
    232     if (slm.Load(argv[1]) == false)
    233         ShowUsage();
    234 
    235     bool usingLogPr = slm.isUseLogPr();
    236 
    237     #define EffectivePr(a)  (float((usingLogPr)?((a)/log(2.0)):(-log2((a)))))
    238     #define OriginalPr(b)   (float((usingLogPr)?((b)*log(2.0)):(exp2(-(b)))))
    239     #define EffectiveBow(a) (float((usingLogPr)?(exp(-(a))):((a))))
    240     #define OriginalBow(b)  (float((usingLogPr)?(-log((b))):((b))))
    241 
    242     printf("\nfirst pass..."); fflush(stdout);
    243     for (int lvl=0; lvl <= slm.getN(); ++lvl) {
    244         CSIMSlmWithIteration::TLevelIterator it;
    245         slm.beginLevelIteration(lvl, it);
    246         for (; !slm.isEnd(it); slm.next(it)) {
    247             CSIMSlm::TLeaf* pl = slm.getNodePtr(it);
    248             real_pr = pl->pr;
    249             eff_pr = EffectivePr(real_pr);
    250             if (pr_eff.find(eff_pr) == pr_eff.end()) {
    251                 pr_eff[eff_pr] = real_pr;
    252             } else { // precision error cause non 1:1 mapping
    253                 pr_eff[eff_pr] = OriginalPr(eff_pr);
    254             }
    255             ++(pr_values[eff_pr]);
    256             if (lvl < slm.getN()) {
    257                 real_bow = ((CSIMSlm::TNode*)pl)->bow;
    258                 eff_bow = EffectiveBow(real_bow);
    259                 if (bow_eff.find(eff_bow) == bow_eff.end()) {
    260                     bow_eff[eff_bow] = real_bow;
    261                 } else { // two values map to same distance value due to precision error
    262                     bow_eff[eff_bow] = OriginalBow(eff_bow);
    263                 }
    264                 ++(bow_values[eff_bow]);
    265             }
    266         }
    267     }
    268 
    269     // Following pr value should not be grouped, or as milestone values.
    270     static float msprs[] = {
    271         0.9, 0.8, 0.7, 0.6,
    272         1.0/2, 1.0/4, 1.0/8, 1.0/16, 1.0/32, 1.0/64, 1.0/128,
    273         1.0/256, 1.0/512, 1.0/1024, 1.0/2048, 1.0/4096, 1.0/8192,
    274         1.0/16384, 1.0/32768, 1.0/65536
    275     };
    276 
    277     for (unsigned i=0, sz=sizeof(msprs)/sizeof(float); i < sz; ++i) {
    278         float real_pr = (usingLogPr)?(-log(msprs[i])):(msprs[i]);
    279         float eff_pr = EffectivePr(real_pr);
    280         if (pr_eff.find(eff_pr) == pr_eff.end()) {
    281             pr_eff[eff_pr] = real_pr;
    282         } else { // precision error cause non 1:1 mapping
    283             pr_eff[eff_pr] = OriginalPr(eff_pr);
    284         }
    285         pr_values[eff_pr] = 0;
    286     }
    287 
    288     // Following bow value should not be grouped, or as milestone values.
    289     static float msbows[] = {
    290         1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2,
    291         0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001,
    292         0.00005, 0.00001, 0.000005, 0.000001, 0.0000005, 0.0000001
    293     };
    294 
    295     for (unsigned i=0, sz=sizeof(msbows)/sizeof(float); i < sz; ++i) {
    296         float real_bow = (usingLogPr)?(-log(msbows[i])):(msbows[i]);
    297         float eff_bow = EffectiveBow(real_bow);
    298         if (bow_eff.find(eff_bow) == bow_eff.end()) {
    299             bow_eff[eff_bow] = real_bow;
    300         } else { // two values map to same distance value due to precision error
    301             bow_eff[eff_bow] = OriginalBow(eff_bow);
    302         }
    303         bow_values[eff_bow] = 0;
    304     }
    305 
    306     printf("\nCompressing pr values..."); fflush(stdout);
    307     vc(pr_eff, pr_values, pr_map, pr_table, (1 << CThreadSlm::BITS_PR));
    308     pr_values.clear();
    309     itte = pr_table.end();
    310     for (itt = pr_table.begin(); itt != itte; ++itt) {
    311         *itt = OriginalPr(*itt);
    312         assert(usingLogPr || (*itt > 0.0 && *itt < 1.0));
    313         assert(!usingLogPr || *itt > 0.0);
    314     }
    315     printf("%d float values ==> %d values", pr_eff.size(), pr_table.size());
    316 
    317     printf("\nCompressing bow values..."); fflush(stdout);
    318     vc(bow_eff, bow_values, bow_map, bow_table, (1 << CThreadSlm::BITS_BOW));
    319     bow_values.clear();
    320     itte = bow_table.end();
    321     for (itt = bow_table.begin(); itt != itte; ++itt)
    322         *itt = OriginalBow(*itt);
    323     printf("%d float values ==> %d values", bow_eff.size(), bow_table.size());
    324 
    325 
    326     printf("\nThreading the new model..."); fflush(stdout);
    327     for (int lvl=0; lvl < slm.getN(); ++lvl) {
    328         levels[lvl] = new CThreadSlm::TNode[slm.getLevelSize(lvl)];
    329 
    330         CSIMSlmWithIteration::TLevelIterator it;
    331         slm.beginLevelIteration(lvl, it);
    332         for (; !slm.isEnd(it); slm.next(it)) {
    333             slm.getIdString(it, history);
    334             if (history.size() == 0) {
    335                 slm.findBackOffState(lvl, NULL, bol, bon);
    336             } else {
    337                 slm.findBackOffState(lvl, &history[0], bol, bon);
    338             }
    339 
    340             CSIMSlm::TNode* pn = (CSIMSlm::TNode*)slm.getNodePtr(it);
    341             CThreadSlm::TNode& nn = levels[lvl][it.m_history.back()];
    342 
    343             std::map<float, int>::iterator prit = pr_map.find(pn->pr);
    344             if (prit == pr_map.end()) { // This would be cause by precision error
    345                 double val = EffectivePr(pn->pr);
    346                 val = OriginalPr(val);
    347                 prit = pr_map.find(val);
    348                 assert(prit != pr_map.end());
    349             }
    350             int idx_pr = prit->second;
    351             nn.set_pr(idx_pr);
    352 
    353             nn.set_wid(pn->id);
    354             nn.set_bon(bon);
    355             nn.set_bol(bol);
    356 
    357             std::map<float, int>::iterator bowit = bow_map.find(pn->bow);
    358             if (bowit == bow_map.end()) { // precision error
    359                 double val = EffectiveBow(pn->bow);
    360                 val = OriginalBow(val);
    361                 bowit = bow_map.find(val);
    362                 assert(bowit != bow_map.end());
    363             }
    364             int idx_bow = bowit->second;
    365             nn.set_bow(idx_bow);
    366 
    367             nn.set_ch(pn->child);
    368 
    369             assert(usingLogPr || (pr_table[idx_pr] > 0.0 && pr_table[idx_pr] < 1.0));
    370             assert(!usingLogPr || pr_table[idx_pr] > 0.0);
    371         }
    372         CSIMSlm::TNode* pn = (CSIMSlm::TNode*)slm.getNodePtr(it);
    373         CThreadSlm::TNode& nn = levels[lvl][it.m_history.back()];
    374         nn.set_ch(pn->child);
    375     };
    376 
    377 
    378     lastLevel = new CThreadSlm::TLeaf [slm.getLevelSize(slm.getN())];
    379     CSIMSlmWithIteration::TLevelIterator it;
    380     slm.beginLevelIteration(slm.getN(), it);
    381     for (int lvl=slm.getN(); !slm.isEnd(it); slm.next(it)) {
    382         CSIMSlm::TLeaf* pn = slm.getNodePtr(it);
    383         slm.getIdString(it, history);
    384         slm.findBackOffState(lvl, &history[0], bol, bon);
    385 
    386         CThreadSlm::TLeaf& nn = lastLevel[it.m_history.back()];
    387 
    388         std::map<float, int>::iterator prit = pr_map.find(pn->pr);
    389         if (prit == pr_map.end()) { // This would be cause by precision error
    390             double val = EffectivePr(pn->pr);
    391             val = OriginalPr(val);
    392             prit = pr_map.find(val);
    393             assert(prit != pr_map.end());
    394         }
    395         int idx_pr = prit->second;
    396         nn.set_pr(idx_pr);
    397 
    398         nn.set_wid(pn->id);
    399         nn.set_bon(bon);
    400         nn.set_bol(bol);
    401     }
    402 
    403 
    404     printf("\nWriting out..."); fflush(stdout);
    405 
    406     float dummy = 0.0;
    407     fp = fopen(argv[2], "wb");
    408     int N = slm.getN();
    409     fwrite(&N, sizeof(int), 1, fp);
    410     unsigned ulp = slm.isUseLogPr();
    411     fwrite(&ulp, sizeof(unsigned), 1, fp);
    412 
    413     for (int lvl = 0; lvl <= N; ++lvl) {
    414         int len = slm.getLevelSize(lvl);
    415         fwrite(&len, sizeof(int), 1, fp);
    416     }
    417     fwrite(&pr_table[0], sizeof(float), pr_table.size(), fp);
    418     for (int i = pr_table.size(), sz=(1 << CThreadSlm::BITS_PR); i < sz; ++i)
    419         fwrite(&dummy, sizeof(float), 1, fp);
    420 
    421     fwrite(&bow_table[0], sizeof(float), bow_table.size(), fp);
    422     for (int i = bow_table.size(), sz=(1 << CThreadSlm::BITS_BOW); i < sz; ++i)
    423         fwrite(&dummy, sizeof(float), 1, fp);
    424 
    425     for (int lvl=0; lvl < N; ++lvl)
    426         fwrite(levels[lvl], sizeof(CThreadSlm::TNode), slm.getLevelSize(lvl), fp);
    427     fwrite(lastLevel, sizeof(CThreadSlm::TLeaf), slm.getLevelSize(N), fp);
    428     fclose(fp);
    429 
    430     printf("done!\n"); fflush(stdout);
    431 
    432     delete [] lastLevel;
    433     for (int lvl=0; lvl < N; ++lvl)
    434         delete []levels[lvl];
    435 
    436     bow_values.clear();
    437     bow_map.clear();
    438     bow_table.clear();
    439 
    440     pr_values.clear();
    441     pr_map.clear();
    442     pr_table.clear();
    443 
    444     slm.Free();
    445 
    446 
    447     return 0;
    448 }
    449