• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

development


Commit MetaInfo

Revisionb68c6b1cbec0aa6556a489e49949da6d8790beaf (tree)
Zeit2010-12-11 02:07:56
AutorKen Wakasa <kwakasa@goog...>
CommiterKen Wakasa

Log Message

Move tools/makedict from platform/development to platform/packages/inputmethods/LatinIME

The corresponding change is I01ef7084

Change-Id: I559207ab75feffe5ef4678c4a85f178a024448c5

Ändern Zusammenfassung

  • delete: tools/makedict/Android.mk
  • delete: tools/makedict/etc/Android.mk
  • delete: tools/makedict/etc/makedict
  • delete: tools/makedict/etc/manifest.txt
  • delete: tools/makedict/src/com/android/tools/dict/BigramDictionary.java
  • delete: tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java

Diff

--- a/tools/makedict/Android.mk
+++ /dev/null
@@ -1,24 +0,0 @@
1-#
2-# Copyright (C) 2009 The Android Open Source Project
3-#
4-# Licensed under the Apache License, Version 2.0 (the "License");
5-# you may not use this file except in compliance with the License.
6-# You may obtain a copy of the License at
7-#
8-# http://www.apache.org/licenses/LICENSE-2.0
9-#
10-# Unless required by applicable law or agreed to in writing, software
11-# distributed under the License is distributed on an "AS IS" BASIS,
12-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-# See the License for the specific language governing permissions and
14-# limitations under the License.
15-#
16-LOCAL_PATH := $(call my-dir)
17-include $(CLEAR_VARS)
18-
19-LOCAL_SRC_FILES := $(call all-java-files-under,src)
20-LOCAL_JAR_MANIFEST := etc/manifest.txt
21-LOCAL_MODULE := makedict
22-
23-include $(BUILD_HOST_JAVA_LIBRARY)
24-include $(LOCAL_PATH)/etc/Android.mk
--- a/tools/makedict/etc/Android.mk
+++ /dev/null
@@ -1,20 +0,0 @@
1-# Copyright (C) 2009 The Android Open Source Project
2-#
3-# Licensed under the Apache License, Version 2.0 (the "License");
4-# you may not use this file except in compliance with the License.
5-# You may obtain a copy of the License at
6-#
7-# http://www.apache.org/licenses/LICENSE-2.0
8-#
9-# Unless required by applicable law or agreed to in writing, software
10-# distributed under the License is distributed on an "AS IS" BASIS,
11-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-# See the License for the specific language governing permissions and
13-# limitations under the License.
14-
15-LOCAL_PATH := $(call my-dir)
16-include $(CLEAR_VARS)
17-
18-LOCAL_PREBUILT_EXECUTABLES := makedict
19-include $(BUILD_HOST_PREBUILT)
20-
--- a/tools/makedict/etc/makedict
+++ /dev/null
@@ -1,63 +0,0 @@
1-#!/bin/sh
2-# Copyright 2009, The Android Open Source Project
3-#
4-# Licensed under the Apache License, Version 2.0 (the "License");
5-# you may not use this file except in compliance with the License.
6-# You may obtain a copy of the License at
7-#
8-# http://www.apache.org/licenses/LICENSE-2.0
9-#
10-# Unless required by applicable law or agreed to in writing, software
11-# distributed under the License is distributed on an "AS IS" BASIS,
12-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-# See the License for the specific language governing permissions and
14-# limitations under the License.
15-
16-# Set up prog to be the path of this script, including following symlinks,
17-# and set up progdir to be the fully-qualified pathname of its directory.
18-prog="$0"
19-while [ -h "${prog}" ]; do
20- newProg=`/bin/ls -ld "${prog}"`
21- newProg=`expr "${newProg}" : ".* -> \(.*\)$"`
22- if expr "x${newProg}" : 'x/' >/dev/null; then
23- prog="${newProg}"
24- else
25- progdir=`dirname "${prog}"`
26- prog="${progdir}/${newProg}"
27- fi
28-done
29-oldwd=`pwd`
30-progdir=`dirname "${prog}"`
31-cd "${progdir}"
32-progdir=`pwd`
33-prog="${progdir}"/`basename "${prog}"`
34-cd "${oldwd}"
35-
36-jarfile=makedict.jar
37-frameworkdir="$progdir"
38-if [ ! -r "$frameworkdir/$jarfile" ]
39-then
40- frameworkdir=`dirname "$progdir"`/tools/lib
41- libdir=`dirname "$progdir"`/tools/lib
42-fi
43-if [ ! -r "$frameworkdir/$jarfile" ]
44-then
45- frameworkdir=`dirname "$progdir"`/framework
46- libdir=`dirname "$progdir"`/lib
47-fi
48-if [ ! -r "$frameworkdir/$jarfile" ]
49-then
50- echo `basename "$prog"`": can't find $jarfile"
51- exit 1
52-fi
53-
54-if [ "$OSTYPE" = "cygwin" ] ; then
55- jarpath=`cygpath -w "$frameworkdir/$jarfile"`
56- progdir=`cygpath -w "$progdir"`
57-else
58- jarpath="$frameworkdir/$jarfile"
59-fi
60-
61-# need to use "java.ext.dirs" because "-jar" causes classpath to be ignored
62-# might need more memory, e.g. -Xmx128M
63-exec java -Djava.ext.dirs="$frameworkdir" -jar "$jarpath" "$@"
--- a/tools/makedict/etc/manifest.txt
+++ /dev/null
@@ -1 +0,0 @@
1-Main-Class: com.android.tools.dict.MakeBinaryDictionary
--- a/tools/makedict/src/com/android/tools/dict/BigramDictionary.java
+++ /dev/null
@@ -1,286 +0,0 @@
1-/*
2- * Copyright (C) 2010 The Android Open Source Project
3- *
4- * Licensed under the Apache License, Version 2.0 (the "License");
5- * you may not use this file except in compliance with the License.
6- * You may obtain a copy of the License at
7- *
8- * http://www.apache.org/licenses/LICENSE-2.0
9- *
10- * Unless required by applicable law or agreed to in writing, software
11- * distributed under the License is distributed on an "AS IS" BASIS,
12- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13- * See the License for the specific language governing permissions and
14- * limitations under the License.
15- */
16-
17-package com.android.tools.dict;
18-
19-import org.xml.sax.Attributes;
20-import org.xml.sax.helpers.DefaultHandler;
21-
22-import java.io.File;
23-import java.util.ArrayList;
24-import java.util.HashMap;
25-import java.util.Map;
26-import java.util.Set;
27-
28-import javax.xml.parsers.SAXParser;
29-import javax.xml.parsers.SAXParserFactory;
30-
31-/**
32- * Helper for MakeBinaryDictionary
33- * Deals with all the bigram data
34- */
35-public class BigramDictionary {
36-
37- /*
38- * Must match the values in the client side which is located in dictionary.cpp & dictionary.h
39- * Changing these values will generate totally different structure which must be also reflected
40- * on the client side.
41- */
42- public static final int FLAG_BIGRAM_READ = 0x80;
43- public static final int FLAG_BIGRAM_CHILDEXIST = 0x40;
44- public static final int FLAG_BIGRAM_CONTINUED = 0x80;
45- public static final int FLAG_BIGRAM_FREQ = 0x7F;
46-
47- public static final int FOR_REVERSE_LOOKUPALL = -99;
48-
49- public ArrayList<String> mBigramToFill = new ArrayList<String>();
50- public ArrayList<Integer> mBigramToFillAddress = new ArrayList<Integer>();
51-
52- public HashMap<String, Bigram> mBi;
53-
54- public boolean mHasBigram;
55-
56- public BigramDictionary(String bigramSrcFilename, boolean hasBigram) {
57- mHasBigram = hasBigram;
58- loadBigram(bigramSrcFilename);
59- }
60-
61- private void loadBigram(String filename) {
62- mBi = new HashMap<String, Bigram>();
63- if (!mHasBigram) {
64- System.out.println("Number of bigrams = " + Bigram.sBigramNum);
65- return;
66- }
67- try {
68- SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
69- parser.parse(new File(filename), new DefaultHandler() {
70- String w1 = null;
71- boolean inWord1 = false;
72- boolean inWord2 = false;
73- int freq = 0, counter = 0;
74- Bigram tempBigram = null;
75-
76- @Override
77- public void startElement(String uri, String localName,
78- String qName, Attributes attributes) {
79- if (qName.equals("bi")) {
80- inWord1 = true;
81- w1 = attributes.getValue(0);
82- int count = Integer.parseInt(attributes.getValue(1));
83- tempBigram = new Bigram(count);
84- counter = 0;
85- } else if (qName.equals("w")) {
86- inWord2 = true;
87- String word2 = attributes.getValue(0);
88- int freq = Integer.parseInt(attributes.getValue(1));
89- tempBigram.setWord2(counter, word2, freq);
90- counter++;
91- Bigram.sBigramNum++;
92- }
93- }
94-
95- @Override
96- public void endElement(String uri, String localName,
97- String qName) {
98- if (inWord2) {
99- inWord2 = false;
100- } else if (inWord1) {
101- inWord1 = false;
102- mBi.put(w1, tempBigram);
103- }
104- }
105- });
106- } catch (Exception ioe) {
107- System.err.println("Exception in parsing bigram\n" + ioe);
108- ioe.printStackTrace();
109- }
110- System.out.println("Number of bigrams = " + Bigram.sBigramNum);
111- }
112-
113- byte[] writeBigrams(byte[] dict, Map<String, Integer> mDictionary) {
114- for (int i = 0; i < mBigramToFill.size(); i++) {
115- String w1 = mBigramToFill.get(i);
116- int address = mBigramToFillAddress.get(i);
117-
118- Bigram temp = mBi.get(w1);
119- int word2Count = temp.count;
120- int j4;
121- for (int j = 0; j < word2Count; j++) {
122- if (!mDictionary.containsKey(temp.word2[j])) {
123- System.out.println("Not in dictionary: " + temp.word2[j]);
124- System.exit(0);
125- } else {
126- j4 = (j * 4);
127- int addressOfWord2 = mDictionary.get(temp.word2[j]);
128- dict[address + j4 + 0] = (byte) (((addressOfWord2 & 0x3F0000) >> 16)
129- | FLAG_BIGRAM_READ);
130- dict[address + j4 + 1] = (byte) ((addressOfWord2 & 0x00FF00) >> 8);
131- dict[address + j4 + 2] = (byte) ((addressOfWord2 & 0x0000FF));
132-
133- if (j == (word2Count - 1)) {
134- dict[address + j4 + 3] = (byte) (temp.freq[j] & FLAG_BIGRAM_FREQ);
135- } else {
136- dict[address + j4 + 3] = (byte) ((temp.freq[j] & FLAG_BIGRAM_FREQ)
137- | FLAG_BIGRAM_CONTINUED);
138- }
139- }
140- }
141- }
142-
143- return dict;
144- }
145-
146- void reverseLookupAll(Map<String, Integer> mDictionary, byte[] dict) {
147- Set<String> st = mDictionary.keySet();
148- for (String s : st) {
149- searchForTerminalNode(mDictionary.get(s), FOR_REVERSE_LOOKUPALL, dict);
150- }
151- }
152-
153- void searchForTerminalNode(int bigramAddress, int frequency, byte[] dict) {
154- StringBuilder sb = new StringBuilder(48);
155- int pos;
156- boolean found = false;
157- int followDownBranchAddress = 2;
158- char followingChar = ' ';
159- int depth = 0;
160- int totalLoopCount = 0;
161-
162- while (!found) {
163- boolean followDownAddressSearchStop = false;
164- boolean firstAddress = true;
165- boolean haveToSearchAll = true;
166-
167- if (depth > 0) {
168- sb.append(followingChar);
169- }
170- pos = followDownBranchAddress; // pos start at count
171- int count = dict[pos] & 0xFF;
172- pos++;
173- for (int i = 0; i < count; i++) {
174- totalLoopCount++;
175- // pos at data
176- pos++;
177- // pos now at flag
178- if (!MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // non-terminal
179- if (!followDownAddressSearchStop) {
180- int addr = MakeBinaryDictionary.get22BitAddress(pos, dict);
181- if (addr > bigramAddress) {
182- followDownAddressSearchStop = true;
183- if (firstAddress) {
184- firstAddress = false;
185- haveToSearchAll = true;
186- } else if (!haveToSearchAll) {
187- break;
188- }
189- } else {
190- followDownBranchAddress = addr;
191- followingChar = (char) (0xFF & dict[pos-1]);
192- if(firstAddress) {
193- firstAddress = false;
194- haveToSearchAll = false;
195- }
196- }
197- }
198- pos += 3;
199- } else if (MakeBinaryDictionary.getFirstBitOfByte(pos, dict)) { // terminal
200- // found !!
201- if (bigramAddress == (pos-1)) {
202- sb.append((char) (0xFF & dict[pos-1]));
203- found = true;
204- break;
205- }
206-
207- // address + freq (4 byte)
208- if (MakeBinaryDictionary.getSecondBitOfByte(pos, dict)) {
209- if (!followDownAddressSearchStop) {
210- int addr = MakeBinaryDictionary.get22BitAddress(pos, dict);
211- if (addr > bigramAddress) {
212- followDownAddressSearchStop = true;
213- if (firstAddress) {
214- firstAddress = false;
215- haveToSearchAll = true;
216- } else if (!haveToSearchAll) {
217- break;
218- }
219- } else {
220- followDownBranchAddress = addr;
221- followingChar = (char) (0xFF & dict[pos-1]);
222- if(firstAddress) {
223- firstAddress = false;
224- haveToSearchAll = true;
225- }
226- }
227- }
228- pos += 4;
229- } else { // freq only (2 byte)
230- pos += 2;
231- }
232- // skipping bigram
233- int bigramExist = (dict[pos] & FLAG_BIGRAM_READ);
234- if (bigramExist > 0) {
235- int nextBigramExist = 1;
236- while (nextBigramExist > 0) {
237- pos += 3;
238- nextBigramExist = (dict[pos++] & FLAG_BIGRAM_CONTINUED);
239- }
240- } else {
241- pos++;
242- }
243- }
244- }
245- depth++;
246- if (followDownBranchAddress == 2) {
247- System.out.println("ERROR!!! Cannot find bigram!!");
248- System.exit(0);
249- }
250- }
251-
252- if (frequency == FOR_REVERSE_LOOKUPALL) {
253- System.out.println("Reverse: " + sb.toString() + " (" + bigramAddress + ")"
254- + " Loop: " + totalLoopCount);
255- } else {
256- System.out.println(" bigram: " + sb.toString() + " (" + bigramAddress + ") freq: "
257- + frequency + " Loop: " + totalLoopCount);
258- }
259- }
260-
261- static class Bigram {
262- String[] word2;
263- int[] freq;
264- int count;
265- static int sBigramNum = 0;
266-
267- String getSecondWord(int i) {
268- return word2[i];
269- }
270-
271- int getFrequency(int i) {
272- return (freq[i] == 0) ? 1 : freq[i];
273- }
274-
275- void setWord2(int index, String word2, int freq) {
276- this.word2[index] = word2;
277- this.freq[index] = freq;
278- }
279-
280- public Bigram(int word2Count) {
281- count = word2Count;
282- word2 = new String[word2Count];
283- freq = new int[word2Count];
284- }
285- }
286-}
--- a/tools/makedict/src/com/android/tools/dict/MakeBinaryDictionary.java
+++ /dev/null
@@ -1,443 +0,0 @@
1-/*
2- * Copyright (C) 2009 The Android Open Source Project
3- *
4- * Licensed under the Apache License, Version 2.0 (the "License");
5- * you may not use this file except in compliance with the License.
6- * You may obtain a copy of the License at
7- *
8- * http://www.apache.org/licenses/LICENSE-2.0
9- *
10- * Unless required by applicable law or agreed to in writing, software
11- * distributed under the License is distributed on an "AS IS" BASIS,
12- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13- * See the License for the specific language governing permissions and
14- * limitations under the License.
15- */
16-
17-package com.android.tools.dict;
18-
19-import org.xml.sax.Attributes;
20-import org.xml.sax.helpers.DefaultHandler;
21-
22-import java.io.File;
23-import java.io.FileOutputStream;
24-import java.io.IOException;
25-import java.util.ArrayList;
26-import java.util.Arrays;
27-import java.util.HashMap;
28-import java.util.List;
29-import java.util.Map;
30-
31-import javax.xml.parsers.SAXParser;
32-import javax.xml.parsers.SAXParserFactory;
33-
34-/**
35- * Compresses a list of words, frequencies, and bigram data
36- * into a tree structured binary dictionary.
37- * Dictionary Version: 200 (may contain bigrams)
38- * Version number started from 200 rather than 1 because we wanted to prevent number of roots in
39- * any old dictionaries being mistaken as the version number. There is not a chance that there
40- * will be more than 200 roots. Version number should be increased when there is structural change
41- * in the data. There is no need to increase the version when only the words in the data changes.
42- */
43-public class MakeBinaryDictionary {
44-
45- private static final int VERSION_NUM = 200;
46-
47- public static final int ALPHA_SIZE = 256;
48-
49- public static final String TAG_WORD = "w";
50- public static final String ATTR_FREQ = "f";
51-
52- private static final int FLAG_ADDRESS_MASK = 0x400000;
53- private static final int FLAG_TERMINAL_MASK = 0x800000;
54- private static final int ADDRESS_MASK = 0x3FFFFF;
55-
56- /**
57- * Unit for this variable is in bytes
58- * If destination file name is main.dict and file limit causes dictionary to be separated into
59- * multiple file, it will generate main0.dict, main1.dict, and so forth.
60- */
61- private static int sOutputFileSize;
62- private static boolean sSplitOutput;
63-
64- public static final CharNode EMPTY_NODE = new CharNode();
65-
66- List<CharNode> roots;
67- Map<String, Integer> mDictionary;
68- int mWordCount;
69-
70- BigramDictionary bigramDict;
71-
72- static class CharNode {
73- char data;
74- int freq;
75- boolean terminal;
76- List<CharNode> children;
77- static int sNodes;
78-
79- public CharNode() {
80- sNodes++;
81- }
82- }
83-
84- public static void usage() {
85- System.err.println("Usage: makedict -s <src_dict.xml> [-b <src_bigram.xml>] "
86- + "-d <dest.dict> [--size filesize]");
87- System.exit(-1);
88- }
89-
90- public static void main(String[] args) {
91- int checkSource = -1;
92- int checkBigram = -1;
93- int checkDest = -1;
94- int checkFileSize = -1;
95- for (int i = 0; i < args.length; i+=2) {
96- if (args[i].equals("-s")) checkSource = (i + 1);
97- if (args[i].equals("-b")) checkBigram = (i + 1);
98- if (args[i].equals("-d")) checkDest = (i + 1);
99- if (args[i].equals("--size")) checkFileSize = (i + 1);
100- }
101- if (checkFileSize >= 0) {
102- sSplitOutput = true;
103- sOutputFileSize = Integer.parseInt(args[checkFileSize]);
104- } else {
105- sSplitOutput = false;
106- }
107- if (checkDest >= 0 && !args[checkDest].endsWith(".dict")) {
108- System.err.println("Error: Dictionary output file extension should be \".dict\"");
109- usage();
110- } else if (checkSource >= 0 && checkBigram >= 0 && checkDest >= 0 &&
111- ((!sSplitOutput && args.length == 6) || (sSplitOutput && args.length == 8))) {
112- new MakeBinaryDictionary(args[checkSource], args[checkBigram], args[checkDest]);
113- } else if (checkSource >= 0 && checkDest >= 0 &&
114- ((!sSplitOutput && args.length == 4) || (sSplitOutput && args.length == 6))) {
115- new MakeBinaryDictionary(args[checkSource], null, args[checkDest]);
116- } else {
117- usage();
118- }
119- }
120-
121- public MakeBinaryDictionary(String srcFilename, String bigramSrcFilename, String destFilename){
122- System.out.println("Generating dictionary version " + VERSION_NUM);
123- bigramDict = new BigramDictionary(bigramSrcFilename, (bigramSrcFilename != null));
124- populateDictionary(srcFilename);
125- writeToDict(destFilename);
126-
127- // Enable the code below to verify that the generated tree is traversable
128- // and bigram data is stored correctly.
129- if (false) {
130- bigramDict.reverseLookupAll(mDictionary, dict);
131- traverseDict(2, new char[32], 0);
132- }
133- }
134-
135- private void populateDictionary(String filename) {
136- roots = new ArrayList<CharNode>();
137- mDictionary = new HashMap<String, Integer>();
138- try {
139- SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
140- parser.parse(new File(filename), new DefaultHandler() {
141- boolean inWord;
142- int freq;
143- StringBuilder wordBuilder = new StringBuilder(48);
144-
145- @Override
146- public void startElement(String uri, String localName,
147- String qName, Attributes attributes) {
148- if (qName.equals("w")) {
149- inWord = true;
150- freq = Integer.parseInt(attributes.getValue(0));
151- wordBuilder.setLength(0);
152- }
153- }
154-
155- @Override
156- public void characters(char[] data, int offset, int length) {
157- // Ignore other whitespace
158- if (!inWord) return;
159- wordBuilder.append(data, offset, length);
160- }
161-
162- @Override
163- public void endElement(String uri, String localName,
164- String qName) {
165- if (qName.equals("w")) {
166- if (wordBuilder.length() > 1) {
167- addWordTop(wordBuilder.toString(), freq);
168- mWordCount++;
169- }
170- inWord = false;
171- }
172- }
173- });
174- } catch (Exception ioe) {
175- System.err.println("Exception in parsing\n" + ioe);
176- ioe.printStackTrace();
177- }
178- System.out.println("Nodes = " + CharNode.sNodes);
179- }
180-
181- private int indexOf(List<CharNode> children, char c) {
182- if (children == null) {
183- return -1;
184- }
185- for (int i = 0; i < children.size(); i++) {
186- if (children.get(i).data == c) {
187- return i;
188- }
189- }
190- return -1;
191- }
192-
193- private void addWordTop(String word, int occur) {
194- if (occur > 255) occur = 255;
195- char firstChar = word.charAt(0);
196- int index = indexOf(roots, firstChar);
197- if (index == -1) {
198- CharNode newNode = new CharNode();
199- newNode.data = firstChar;
200- newNode.freq = occur;
201- index = roots.size();
202- roots.add(newNode);
203- } else {
204- roots.get(index).freq += occur;
205- }
206- if (word.length() > 1) {
207- addWordRec(roots.get(index), word, 1, occur);
208- } else {
209- roots.get(index).terminal = true;
210- }
211- }
212-
213- private void addWordRec(CharNode parent, String word, int charAt, int occur) {
214- CharNode child = null;
215- char data = word.charAt(charAt);
216- if (parent.children == null) {
217- parent.children = new ArrayList<CharNode>();
218- } else {
219- for (int i = 0; i < parent.children.size(); i++) {
220- CharNode node = parent.children.get(i);
221- if (node.data == data) {
222- child = node;
223- break;
224- }
225- }
226- }
227- if (child == null) {
228- child = new CharNode();
229- parent.children.add(child);
230- }
231- child.data = data;
232- if (child.freq == 0) child.freq = occur;
233- if (word.length() > charAt + 1) {
234- addWordRec(child, word, charAt + 1, occur);
235- } else {
236- child.terminal = true;
237- child.freq = occur;
238- }
239- }
240-
241- byte[] dict;
242- int dictSize;
243- static final int CHAR_WIDTH = 8;
244- static final int FLAGS_WIDTH = 1; // Terminal flag (word end)
245- static final int ADDR_WIDTH = 23; // Offset to children
246- static final int FREQ_WIDTH_BYTES = 1;
247- static final int COUNT_WIDTH_BYTES = 1;
248-
249- private void addCount(int count) {
250- dict[dictSize++] = (byte) (0xFF & count);
251- }
252-
253- private void addNode(CharNode node, String word1) {
254- if (node.terminal) { // store address of each word1
255- mDictionary.put(word1, dictSize);
256- }
257- int charData = 0xFFFF & node.data;
258- if (charData > 254) {
259- dict[dictSize++] = (byte) 255;
260- dict[dictSize++] = (byte) ((node.data >> 8) & 0xFF);
261- dict[dictSize++] = (byte) (node.data & 0xFF);
262- } else {
263- dict[dictSize++] = (byte) (0xFF & node.data);
264- }
265- if (node.children != null) {
266- dictSize += 3; // Space for children address
267- } else {
268- dictSize += 1; // Space for just the terminal/address flags
269- }
270- if ((0xFFFFFF & node.freq) > 255) {
271- node.freq = 255;
272- }
273- if (node.terminal) {
274- byte freq = (byte) (0xFF & node.freq);
275- dict[dictSize++] = freq;
276- // bigram
277- if (bigramDict.mBi.containsKey(word1)) {
278- int count = bigramDict.mBi.get(word1).count;
279- bigramDict.mBigramToFill.add(word1);
280- bigramDict.mBigramToFillAddress.add(dictSize);
281- dictSize += (4 * count);
282- } else {
283- dict[dictSize++] = (byte) (0x00);
284- }
285- }
286- }
287-
288- int nullChildrenCount = 0;
289- int notTerminalCount = 0;
290-
291- private void updateNodeAddress(int nodeAddress, CharNode node,
292- int childrenAddress) {
293- if ((dict[nodeAddress] & 0xFF) == 0xFF) { // 3 byte character
294- nodeAddress += 2;
295- }
296- childrenAddress = ADDRESS_MASK & childrenAddress;
297- if (childrenAddress == 0) {
298- nullChildrenCount++;
299- } else {
300- childrenAddress |= FLAG_ADDRESS_MASK;
301- }
302- if (node.terminal) {
303- childrenAddress |= FLAG_TERMINAL_MASK;
304- } else {
305- notTerminalCount++;
306- }
307- dict[nodeAddress + 1] = (byte) (childrenAddress >> 16);
308- if ((childrenAddress & FLAG_ADDRESS_MASK) != 0) {
309- dict[nodeAddress + 2] = (byte) ((childrenAddress & 0xFF00) >> 8);
310- dict[nodeAddress + 3] = (byte) ((childrenAddress & 0xFF));
311- }
312- }
313-
314- void writeWordsRec(List<CharNode> children, StringBuilder word) {
315- if (children == null || children.size() == 0) {
316- return;
317- }
318- final int childCount = children.size();
319- addCount(childCount);
320- int[] childrenAddresses = new int[childCount];
321- for (int j = 0; j < childCount; j++) {
322- CharNode node = children.get(j);
323- childrenAddresses[j] = dictSize;
324- word.append(children.get(j).data);
325- addNode(node, word.toString());
326- word.deleteCharAt(word.length()-1);
327- }
328- for (int j = 0; j < childCount; j++) {
329- CharNode node = children.get(j);
330- int nodeAddress = childrenAddresses[j];
331- int cacheDictSize = dictSize;
332- word.append(children.get(j).data);
333- writeWordsRec(node.children, word);
334- word.deleteCharAt(word.length()-1);
335- updateNodeAddress(nodeAddress, node, node.children != null
336- ? cacheDictSize : 0);
337- }
338- }
339-
340- void writeToDict(String dictFilename) {
341- // 4MB max, 22-bit offsets
342- dict = new byte[4 * 1024 * 1024]; // 4MB upper limit. Actual is probably
343- // < 1MB in most cases, as there is a limit in the
344- // resource size in apks.
345- dictSize = 0;
346-
347- dict[dictSize++] = (byte) (0xFF & VERSION_NUM); // version info
348- dict[dictSize++] = (byte) (0xFF & (bigramDict.mHasBigram ? 1 : 0));
349-
350- StringBuilder word = new StringBuilder(48);
351- writeWordsRec(roots, word);
352- dict = bigramDict.writeBigrams(dict, mDictionary);
353- System.out.println("Dict Size = " + dictSize);
354- if (!sSplitOutput) {
355- sOutputFileSize = dictSize;
356- }
357- try {
358- int currentLoc = 0;
359- int i = 0;
360- int extension = dictFilename.indexOf(".dict");
361- String filename = dictFilename.substring(0, extension);
362- while (dictSize > 0) {
363- FileOutputStream fos;
364- if (sSplitOutput) {
365- fos = new FileOutputStream(filename + i + ".dict");
366- } else {
367- fos = new FileOutputStream(filename + ".dict");
368- }
369- if (dictSize > sOutputFileSize) {
370- fos.write(dict, currentLoc, sOutputFileSize);
371- dictSize -= sOutputFileSize;
372- currentLoc += sOutputFileSize;
373- } else {
374- fos.write(dict, currentLoc, dictSize);
375- dictSize = 0;
376- }
377- fos.close();
378- i++;
379- }
380- } catch (IOException ioe) {
381- System.err.println("Error writing dict file:" + ioe);
382- }
383- }
384-
385- void traverseDict(int pos, char[] word, int depth) {
386- int count = dict[pos++] & 0xFF;
387- for (int i = 0; i < count; i++) {
388- char c = (char) (dict[pos++] & 0xFF);
389- if (c == 0xFF) { // two byte character
390- c = (char) (((dict[pos] & 0xFF) << 8) | (dict[pos+1] & 0xFF));
391- pos += 2;
392- }
393- word[depth] = c;
394- boolean terminal = getFirstBitOfByte(pos, dict);
395- int address = 0;
396- if ((dict[pos] & (FLAG_ADDRESS_MASK >> 16)) > 0) { // address check
397- address = get22BitAddress(pos, dict);
398- pos += 3;
399- } else {
400- pos += 1;
401- }
402- if (terminal) {
403- showWord(word, depth + 1, dict[pos] & 0xFF);
404- pos++;
405-
406- int bigramExist = (dict[pos] & bigramDict.FLAG_BIGRAM_READ);
407- if (bigramExist > 0) {
408- int nextBigramExist = 1;
409- while (nextBigramExist > 0) {
410- int bigramAddress = get22BitAddress(pos, dict);
411- pos += 3;
412- int frequency = (bigramDict.FLAG_BIGRAM_FREQ & dict[pos]);
413- bigramDict.searchForTerminalNode(bigramAddress, frequency, dict);
414- nextBigramExist = (dict[pos++] & bigramDict.FLAG_BIGRAM_CONTINUED);
415- }
416- } else {
417- pos++;
418- }
419- }
420- if (address != 0) {
421- traverseDict(address, word, depth + 1);
422- }
423- }
424- }
425-
426- void showWord(char[] word, int size, int freq) {
427- System.out.print(new String(word, 0, size) + " " + freq + "\n");
428- }
429-
430- static int get22BitAddress(int pos, byte[] dict) {
431- return ((dict[pos + 0] & 0x3F) << 16)
432- | ((dict[pos + 1] & 0xFF) << 8)
433- | ((dict[pos + 2] & 0xFF));
434- }
435-
436- static boolean getFirstBitOfByte(int pos, byte[] dict) {
437- return (dict[pos] & 0x80) > 0;
438- }
439-
440- static boolean getSecondBitOfByte(int pos, byte[] dict) {
441- return (dict[pos] & 0x40) > 0;
442- }
443-}