1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. 5 * 6 * The contents of this file are subject to the terms of either the GNU Lesser 7 * General Public License Version 2.1 only ("LGPL") or the Common Development and 8 * Distribution License ("CDDL")(collectively, the "License"). You may not use this 9 * file except in compliance with the License. You can obtain a copy of the CDDL at 10 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at 11 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the 12 * specific language governing permissions and limitations under the License. When 13 * distributing the software, include this License Header Notice in each file and 14 * include the full text of the License in the License file as well as the 15 * following notice: 16 * 17 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE 18 * (CDDL) 19 * For Covered Software in this distribution, this License shall be governed by the 20 * laws of the State of California (excluding conflict-of-law provisions). 21 * Any litigation relating to this License shall be subject to the jurisdiction of 22 * the Federal Courts of the Northern District of California and the state courts 23 * of the State of California, with venue lying in Santa Clara County, California. 24 * 25 * Contributor(s): 26 * 27 * If you wish your version of this file to be governed by only the CDDL or only 28 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to 29 * include this software in this distribution under the [CDDL or LGPL Version 2.1] 30 * license." If you don't indicate a single choice of license, a recipient has the 31 * option to distribute your version of this file under either the CDDL or the LGPL 32 * Version 2.1, or to extend the choice of license to its licensees as provided 33 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL 34 * Version 2 license, then the option applies only if the new code is made subject 35 * to such option by the copyright holder. 36 */ 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 41 #include "sim_dict.h" 42 43 44 void CSIMDict::freeSubTree(CSIMDict::TState& root) 45 { 46 if (root.follow != NULL) { 47 Map_Type &map = *(root.follow); 48 for (Map_Type::iterator it=map.begin(), last=map.end(); it != last; ++it) 49 freeSubTree(it->second); 50 delete root.follow; 51 root.follow = NULL; 52 } 53 } 54 55 const CSIMDict::TState* CSIMDict::step(const CSIMDict::TState* root, TWCHAR wch) 56 { 57 if ((root != NULL) && (root->follow != NULL) && wch != WCH_NULL) { 58 Map_Type::iterator it = root->follow->find(TSIMChar(wch)); 59 if (it != root->follow->end()) 60 return &(it->second); 61 } 62 return NULL; 63 } 64 65 int CSIMDict::matchLongest(const CSIMDict::TState* root, CSIMDict::PState & result, const TWCHAR* str) 66 { 67 int lastWordLen = 0, len = 0; 68 result = root; 69 while (root != NULL) { 70 if (root->word_id != SIM_ID_NOT_WORD) { 71 result = root; 72 lastWordLen = len; 73 } 74 ++len; 75 root = step(root, *str++); 76 } 77 return lastWordLen; 78 } 79 80 bool 81 CSIMDict::parseText(const char* filename) 82 { 83 FILE * fp = NULL; 84 static char buf[1024]; 85 static TWCHAR wword[sizeof(buf)]; 86 unsigned int id; 87 88 try { 89 if ((fp = fopen(filename, "r")) == NULL) 90 return false; 91 while (fgets(buf, 1024, fp) != NULL) { 92 if (*buf == '\n' || *buf == '#') 93 continue; 94 95 char* p = buf; 96 while (*p == ' ' || *p == '\t') 97 ++p; 98 char* pstart = p; 99 while (*p != 0 && *p != ' ' && *p != '\t') 100 ++p; 101 if (*p == 0) 102 continue; 103 *p++ = 0; 104 while (*p == ' ' || *p == '\t') 105 ++p; 106 if (!(*p >= '0' && *p <= '9')) continue; 107 for (id=0; *p >= '0' && *p <= '9'; ++p) 108 id = 10*id + (*p - '0'); 109 110 if (id < SIM_ID_REALWORD_START) 111 continue; 112 if (MBSTOWCS(wword, pstart, sizeof(buf)) != (size_t)-1) { 113 insertWord(wword, TSIMWordId(id)); 114 } else { 115 fprintf(stderr, "mbs to wcs conversion error for : %s %d\n", buf, id); 116 exit(100); 117 } 118 } 119 fclose(fp); 120 } catch (...) { 121 if (fp != NULL) 122 fclose(fp); 123 buf[sizeof(buf)-1] = 0; 124 fprintf(stderr, "Catch exception when loading dictionary at %s, existing...", buf); 125 exit(200); 126 } 127 return true; 128 } 129 130 void CSIMDict::insertWord(const TWCHAR* wstr, TSIMWordId id) 131 { 132 TState* ps = &m_root; 133 while (*wstr) { 134 TSIMChar ch(*wstr++); 135 TSIMWordId nodeId = (*wstr)?SIM_ID_NOT_WORD:id; 136 if (ps->follow == NULL) { 137 ps->follow = new Map_Type(); 138 } 139 Map_Type & map = *(ps->follow); 140 Map_Type::iterator it = map.find(ch); 141 if (it != map.end() && nodeId != SIM_ID_NOT_WORD && 142 it->second.word_id != SIM_ID_NOT_WORD && it->second.word_id != nodeId) { 143 throw new int(100); 144 } 145 if (it != map.end()){ 146 if (nodeId != SIM_ID_NOT_WORD) 147 it->second.word_id = nodeId; 148 ps = &(it->second); 149 } else { 150 ps = &(map[ch] = TState(nodeId)); 151 } 152 } 153 } 154 155 void CSIMDict::InnerPrint(FILE* fp, wstring & wstr, const TState* pnode) 156 { 157 if (pnode != NULL && pnode->word_id != SIM_ID_NOT_WORD) { 158 char* buf = new char[wstr.size()*2+2]; 159 WCSTOMBS(buf, wstr.c_str(), wstr.size()*2+2); 160 fprintf(fp, "%s %d\n", buf, unsigned(pnode->word_id)); 161 delete[] buf; 162 } 163 if (pnode != NULL && pnode->follow != NULL) { 164 Map_Type::iterator it, ite = pnode->follow->end(); 165 for (it = pnode->follow->begin(); it != ite; ++it) { 166 TWCHAR wch = TWCHAR(it->first); 167 wstr.push_back(wch); 168 InnerPrint(fp, wstr, &(it->second)); 169 wstr.erase(wstr.size()-1, 1); 170 } 171 } 172 } 173