Revision | 86e8a0ecc6943322303429f842944341537829dd (tree) |
---|---|
Zeit | 2015-03-23 18:45:31 |
Autor | komutan <t_komuta@nift...> |
Commiter | komutan |
.NET FW が対応していない文字コード名に対応
@@ -1,289 +1,289 @@ | ||
1 | -// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer | |
2 | -// | |
3 | -// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org> | |
4 | -// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation | |
5 | -using System; | |
6 | -using System.Collections.Generic; | |
7 | -using System.Text; | |
8 | -using System.IO; | |
9 | -#if MMF_DIC | |
10 | -using System.IO.MemoryMappedFiles; | |
11 | -#endif | |
12 | - | |
13 | -namespace NMeCab.Core | |
14 | -{ | |
15 | - public class MeCabDictionary : IDisposable | |
16 | - { | |
17 | - #region Const/Field/Property | |
18 | - | |
19 | - private const uint DictionaryMagicID = 0xEF718F77u; | |
20 | - private const uint DicVersion = 102u; | |
21 | - | |
22 | -#if MMF_DIC | |
23 | - private MemoryMappedFile mmf; | |
24 | - private MemoryMappedViewAccessor tokens; | |
25 | - private MemoryMappedViewAccessor features; | |
26 | -#else | |
27 | - private Token[] tokens; | |
28 | - private byte[] features; | |
29 | -#endif | |
30 | - | |
31 | - private DoubleArray da = new DoubleArray(); | |
32 | - | |
33 | - private Encoding encoding; | |
34 | - | |
35 | - /// <summary> | |
36 | - /// 辞書の文字コード | |
37 | - /// </summary> | |
38 | - public string CharSet | |
39 | - { | |
40 | - get { return this.encoding.WebName; } | |
41 | - } | |
42 | - | |
43 | - /// <summary> | |
44 | - /// バージョン | |
45 | - /// </summary> | |
46 | - public uint Version { get; private set; } | |
47 | - | |
48 | - /// <summary> | |
49 | - /// 辞書のタイプ | |
50 | - /// </summary> | |
51 | - public DictionaryType Type { get; private set; } | |
52 | - | |
53 | - public uint LexSize { get; private set; } | |
54 | - | |
55 | - /// <summary> | |
56 | - /// 左文脈 ID のサイズ | |
57 | - /// </summary> | |
58 | - public uint LSize { get; private set; } | |
59 | - | |
60 | - /// <summary> | |
61 | - /// 右文脈 ID のサイズ | |
62 | - /// </summary> | |
63 | - public uint RSize { get; private set; } | |
64 | - | |
65 | - /// <summary> | |
66 | - /// 辞書のファイル名 | |
67 | - /// </summary> | |
68 | - public string FileName { get; private set; } | |
69 | - | |
70 | - #endregion | |
71 | - | |
72 | - #region Open | |
73 | - | |
74 | -#if MMF_DIC | |
75 | - | |
76 | - public void Open(string filePath) | |
77 | - { | |
78 | - this.mmf = MemoryMappedFile.CreateFromFile(filePath, FileMode.Open, | |
79 | - null, 0L, MemoryMappedFileAccess.Read); | |
80 | - this.Open(this.mmf, filePath); | |
81 | - } | |
82 | - | |
83 | - public void Open(MemoryMappedFile mmf, string filePath = null) | |
84 | - { | |
85 | - this.FileName = filePath; | |
86 | - | |
87 | - using (MemoryMappedViewStream stream = mmf.CreateViewStream( | |
88 | - 0L, 0L, MemoryMappedFileAccess.Read)) | |
89 | - using (BinaryReader reader = new BinaryReader(stream)) | |
90 | - { | |
91 | - uint magic = reader.ReadUInt32(); | |
92 | - if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用 | |
93 | - throw new MeCabInvalidFileException("dictionary file is broken", filePath); | |
94 | - | |
95 | - this.Version = reader.ReadUInt32(); | |
96 | - if (this.Version != DicVersion) | |
97 | - throw new MeCabInvalidFileException("incompatible version", filePath); | |
98 | - | |
99 | - this.Type = (DictionaryType)reader.ReadUInt32(); | |
100 | - this.LexSize = reader.ReadUInt32(); | |
101 | - this.LSize = reader.ReadUInt32(); | |
102 | - this.RSize = reader.ReadUInt32(); | |
103 | - uint dSize = reader.ReadUInt32(); | |
104 | - uint tSize = reader.ReadUInt32(); | |
105 | - uint fSize = reader.ReadUInt32(); | |
106 | - reader.ReadUInt32(); //dummy | |
107 | - | |
108 | - string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); | |
109 | - this.encoding = Encoding.GetEncoding(charSet); | |
110 | - | |
111 | - long offset = stream.Position; | |
112 | - this.da.Open(mmf, offset, dSize); | |
113 | - offset += dSize; | |
114 | - this.tokens = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read); | |
115 | - offset += tSize; | |
116 | - this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read); | |
117 | - } | |
118 | - } | |
119 | - | |
120 | -#else | |
121 | - | |
122 | - public void Open(string filePath) | |
123 | - { | |
124 | - this.FileName = filePath; | |
125 | - | |
126 | - using (FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read)) | |
127 | - using (BinaryReader reader = new BinaryReader(fileStream)) | |
128 | - { | |
129 | - this.Open(reader); | |
130 | - } | |
131 | - } | |
132 | - | |
133 | - public unsafe void Open(BinaryReader reader) | |
134 | - { | |
135 | - uint magic = reader.ReadUInt32(); | |
136 | - //CanSeekの時のみストリーム長のチェック | |
137 | - if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (magic ^ DictionaryMagicID)) | |
138 | - throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); | |
139 | - | |
140 | - this.Version = reader.ReadUInt32(); | |
141 | - if (this.Version != DicVersion) | |
142 | - throw new MeCabInvalidFileException("incompatible version", this.FileName); | |
143 | - | |
144 | - this.Type = (DictionaryType)reader.ReadUInt32(); | |
145 | - this.LexSize = reader.ReadUInt32(); | |
146 | - this.LSize = reader.ReadUInt32(); | |
147 | - this.RSize = reader.ReadUInt32(); | |
148 | - uint dSize = reader.ReadUInt32(); | |
149 | - uint tSize = reader.ReadUInt32(); | |
150 | - uint fSize = reader.ReadUInt32(); | |
151 | - reader.ReadUInt32(); //dummy | |
152 | - | |
153 | - string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); | |
154 | - this.encoding = Encoding.GetEncoding(charSet); | |
155 | - | |
156 | - this.da.Open(reader, dSize); | |
157 | - | |
158 | - this.tokens = new Token[tSize / sizeof(Token)]; | |
159 | - for (int i = 0; i < this.tokens.Length; i++) | |
160 | - this.tokens[i] = Token.Create(reader); | |
161 | - | |
162 | - this.features = reader.ReadBytes((int)fSize); | |
163 | - | |
164 | - if (reader.BaseStream.ReadByte() != -1) | |
165 | - throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); | |
166 | - } | |
167 | - | |
168 | -#endif | |
169 | - | |
170 | - #endregion | |
171 | - | |
172 | - #region Search | |
173 | - | |
174 | - public unsafe DoubleArray.ResultPair ExactMatchSearch(string key) | |
175 | - { | |
176 | - fixed (char* pKey = key) | |
177 | - return this.ExactMatchSearch(pKey, key.Length, 0); | |
178 | - } | |
179 | - | |
180 | - public unsafe DoubleArray.ResultPair ExactMatchSearch(char* key, int len, int nodePos = 0) | |
181 | - { | |
182 | - //if (this.encoding == Encoding.Unicode) | |
183 | - // return this.da.ExactMatchSearch((byte*)key, len, nodePos); | |
184 | - | |
185 | - //エンコード | |
186 | - int maxByteCount = this.encoding.GetMaxByteCount(len); | |
187 | - byte* bytes = stackalloc byte[maxByteCount]; | |
188 | - int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteCount); | |
189 | - | |
190 | - DoubleArray.ResultPair result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos); | |
191 | - | |
192 | - //文字数をデコードしたものに変換 | |
193 | - result.Length = this.encoding.GetCharCount(bytes, result.Length); | |
194 | - | |
195 | - return result; | |
196 | - } | |
197 | - | |
198 | - public unsafe int CommonPrefixSearch(char* key, int len, DoubleArray.ResultPair* result, int rLen) | |
199 | - { | |
200 | - //if (this.encoding == Encoding.Unicode) | |
201 | - // return this.da.CommonPrefixSearch((byte*)key, result, rLen, len); | |
202 | - | |
203 | - //エンコード | |
204 | - int maxByteLen = this.encoding.GetMaxByteCount(len); | |
205 | - byte* bytes = stackalloc byte[maxByteLen]; | |
206 | - int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteLen); | |
207 | - | |
208 | - int n = this.da.CommonPrefixSearch(bytes, result, rLen, bytesLen); | |
209 | - | |
210 | - //文字数をデコードしたものに変換 | |
211 | - for (int i = 0; i < n; i++) | |
212 | - result[i].Length = this.encoding.GetCharCount(bytes, result[i].Length); | |
213 | - | |
214 | - return n; | |
215 | - } | |
216 | - | |
217 | - #endregion | |
218 | - | |
219 | - #region Get Infomation | |
220 | - | |
221 | - public unsafe Token[] GetToken(DoubleArray.ResultPair n) | |
222 | - { | |
223 | - Token[] dist = new Token[0xFF & n.Value]; | |
224 | - int tokenPos = n.Value >> 8; | |
225 | -#if MMF_DIC | |
226 | - this.tokens.ReadArray<Token>(tokenPos * sizeof(Token), dist, 0, dist.Length); | |
227 | -#else | |
228 | - Array.Copy(this.tokens, tokenPos, dist, 0, dist.Length); | |
229 | -#endif | |
230 | - return dist; | |
231 | - } | |
232 | - | |
233 | - public string GetFeature(uint featurePos) | |
234 | - { | |
235 | - return StrUtils.GetString(this.features, (long)featurePos, this.encoding); | |
236 | - } | |
237 | - | |
238 | - #endregion | |
239 | - | |
240 | - #region etc. | |
241 | - | |
242 | - public bool IsCompatible(MeCabDictionary d) | |
243 | - { | |
244 | - return (this.Version == d.Version && | |
245 | - this.LSize == d.LSize && | |
246 | - this.RSize == d.RSize && | |
247 | - this.CharSet == d.CharSet); | |
248 | - } | |
249 | - | |
250 | - #endregion | |
251 | - | |
252 | - #region Dispose | |
253 | - | |
254 | - private bool disposed; | |
255 | - | |
256 | - /// <summary> | |
257 | - /// 使用されているリソースを開放する | |
258 | - /// </summary> | |
259 | - public void Dispose() | |
260 | - { | |
261 | - this.Dispose(true); | |
262 | - GC.SuppressFinalize(this); | |
263 | - } | |
264 | - | |
265 | - protected virtual void Dispose(bool disposing) | |
266 | - { | |
267 | - if (disposed) return; | |
268 | - | |
269 | - if (disposing) | |
270 | - { | |
271 | - if (this.da != null) this.da.Dispose(); | |
272 | -#if MMF_DIC | |
273 | - if (this.mmf != null) this.mmf.Dispose(); | |
274 | - if (this.tokens != null) this.tokens.Dispose(); | |
275 | - if (this.features != null) this.features.Dispose(); | |
276 | -#endif | |
277 | - } | |
278 | - | |
279 | - this.disposed = true; | |
280 | - } | |
281 | - | |
282 | - ~MeCabDictionary() | |
283 | - { | |
284 | - this.Dispose(false); | |
285 | - } | |
286 | - | |
287 | - #endregion | |
288 | - } | |
289 | -} | |
1 | +// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer | |
2 | +// | |
3 | +// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org> | |
4 | +// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation | |
5 | +using System; | |
6 | +using System.Collections.Generic; | |
7 | +using System.Text; | |
8 | +using System.IO; | |
9 | +#if MMF_DIC | |
10 | +using System.IO.MemoryMappedFiles; | |
11 | +#endif | |
12 | + | |
13 | +namespace NMeCab.Core | |
14 | +{ | |
15 | + public class MeCabDictionary : IDisposable | |
16 | + { | |
17 | + #region Const/Field/Property | |
18 | + | |
19 | + private const uint DictionaryMagicID = 0xEF718F77u; | |
20 | + private const uint DicVersion = 102u; | |
21 | + | |
22 | +#if MMF_DIC | |
23 | + private MemoryMappedFile mmf; | |
24 | + private MemoryMappedViewAccessor tokens; | |
25 | + private MemoryMappedViewAccessor features; | |
26 | +#else | |
27 | + private Token[] tokens; | |
28 | + private byte[] features; | |
29 | +#endif | |
30 | + | |
31 | + private DoubleArray da = new DoubleArray(); | |
32 | + | |
33 | + private Encoding encoding; | |
34 | + | |
35 | + /// <summary> | |
36 | + /// 辞書の文字コード | |
37 | + /// </summary> | |
38 | + public string CharSet | |
39 | + { | |
40 | + get { return this.encoding.WebName; } | |
41 | + } | |
42 | + | |
43 | + /// <summary> | |
44 | + /// バージョン | |
45 | + /// </summary> | |
46 | + public uint Version { get; private set; } | |
47 | + | |
48 | + /// <summary> | |
49 | + /// 辞書のタイプ | |
50 | + /// </summary> | |
51 | + public DictionaryType Type { get; private set; } | |
52 | + | |
53 | + public uint LexSize { get; private set; } | |
54 | + | |
55 | + /// <summary> | |
56 | + /// 左文脈 ID のサイズ | |
57 | + /// </summary> | |
58 | + public uint LSize { get; private set; } | |
59 | + | |
60 | + /// <summary> | |
61 | + /// 右文脈 ID のサイズ | |
62 | + /// </summary> | |
63 | + public uint RSize { get; private set; } | |
64 | + | |
65 | + /// <summary> | |
66 | + /// 辞書のファイル名 | |
67 | + /// </summary> | |
68 | + public string FileName { get; private set; } | |
69 | + | |
70 | + #endregion | |
71 | + | |
72 | + #region Open | |
73 | + | |
74 | +#if MMF_DIC | |
75 | + | |
76 | + public void Open(string filePath) | |
77 | + { | |
78 | + this.mmf = MemoryMappedFile.CreateFromFile(filePath, FileMode.Open, | |
79 | + null, 0L, MemoryMappedFileAccess.Read); | |
80 | + this.Open(this.mmf, filePath); | |
81 | + } | |
82 | + | |
83 | + public void Open(MemoryMappedFile mmf, string filePath = null) | |
84 | + { | |
85 | + this.FileName = filePath; | |
86 | + | |
87 | + using (MemoryMappedViewStream stream = mmf.CreateViewStream( | |
88 | + 0L, 0L, MemoryMappedFileAccess.Read)) | |
89 | + using (BinaryReader reader = new BinaryReader(stream)) | |
90 | + { | |
91 | + uint magic = reader.ReadUInt32(); | |
92 | + if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用 | |
93 | + throw new MeCabInvalidFileException("dictionary file is broken", filePath); | |
94 | + | |
95 | + this.Version = reader.ReadUInt32(); | |
96 | + if (this.Version != DicVersion) | |
97 | + throw new MeCabInvalidFileException("incompatible version", filePath); | |
98 | + | |
99 | + this.Type = (DictionaryType)reader.ReadUInt32(); | |
100 | + this.LexSize = reader.ReadUInt32(); | |
101 | + this.LSize = reader.ReadUInt32(); | |
102 | + this.RSize = reader.ReadUInt32(); | |
103 | + uint dSize = reader.ReadUInt32(); | |
104 | + uint tSize = reader.ReadUInt32(); | |
105 | + uint fSize = reader.ReadUInt32(); | |
106 | + reader.ReadUInt32(); //dummy | |
107 | + | |
108 | + string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); | |
109 | + this.encoding = StrUtils.GetEncoding(charSet); | |
110 | + | |
111 | + long offset = stream.Position; | |
112 | + this.da.Open(mmf, offset, dSize); | |
113 | + offset += dSize; | |
114 | + this.tokens = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read); | |
115 | + offset += tSize; | |
116 | + this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read); | |
117 | + } | |
118 | + } | |
119 | + | |
120 | +#else | |
121 | + | |
122 | + public void Open(string filePath) | |
123 | + { | |
124 | + this.FileName = filePath; | |
125 | + | |
126 | + using (FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read)) | |
127 | + using (BinaryReader reader = new BinaryReader(fileStream)) | |
128 | + { | |
129 | + this.Open(reader); | |
130 | + } | |
131 | + } | |
132 | + | |
133 | + public unsafe void Open(BinaryReader reader) | |
134 | + { | |
135 | + uint magic = reader.ReadUInt32(); | |
136 | + //CanSeekの時のみストリーム長のチェック | |
137 | + if (reader.BaseStream.CanSeek && reader.BaseStream.Length != (magic ^ DictionaryMagicID)) | |
138 | + throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); | |
139 | + | |
140 | + this.Version = reader.ReadUInt32(); | |
141 | + if (this.Version != DicVersion) | |
142 | + throw new MeCabInvalidFileException("incompatible version", this.FileName); | |
143 | + | |
144 | + this.Type = (DictionaryType)reader.ReadUInt32(); | |
145 | + this.LexSize = reader.ReadUInt32(); | |
146 | + this.LSize = reader.ReadUInt32(); | |
147 | + this.RSize = reader.ReadUInt32(); | |
148 | + uint dSize = reader.ReadUInt32(); | |
149 | + uint tSize = reader.ReadUInt32(); | |
150 | + uint fSize = reader.ReadUInt32(); | |
151 | + reader.ReadUInt32(); //dummy | |
152 | + | |
153 | + string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII); | |
154 | + this.encoding = StrUtils.GetEncoding(charSet); | |
155 | + | |
156 | + this.da.Open(reader, dSize); | |
157 | + | |
158 | + this.tokens = new Token[tSize / sizeof(Token)]; | |
159 | + for (int i = 0; i < this.tokens.Length; i++) | |
160 | + this.tokens[i] = Token.Create(reader); | |
161 | + | |
162 | + this.features = reader.ReadBytes((int)fSize); | |
163 | + | |
164 | + if (reader.BaseStream.ReadByte() != -1) | |
165 | + throw new MeCabInvalidFileException("dictionary file is broken", this.FileName); | |
166 | + } | |
167 | + | |
168 | +#endif | |
169 | + | |
170 | + #endregion | |
171 | + | |
172 | + #region Search | |
173 | + | |
174 | + public unsafe DoubleArray.ResultPair ExactMatchSearch(string key) | |
175 | + { | |
176 | + fixed (char* pKey = key) | |
177 | + return this.ExactMatchSearch(pKey, key.Length, 0); | |
178 | + } | |
179 | + | |
180 | + public unsafe DoubleArray.ResultPair ExactMatchSearch(char* key, int len, int nodePos = 0) | |
181 | + { | |
182 | + //if (this.encoding == Encoding.Unicode) | |
183 | + // return this.da.ExactMatchSearch((byte*)key, len, nodePos); | |
184 | + | |
185 | + //エンコード | |
186 | + int maxByteCount = this.encoding.GetMaxByteCount(len); | |
187 | + byte* bytes = stackalloc byte[maxByteCount]; | |
188 | + int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteCount); | |
189 | + | |
190 | + DoubleArray.ResultPair result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos); | |
191 | + | |
192 | + //文字数をデコードしたものに変換 | |
193 | + result.Length = this.encoding.GetCharCount(bytes, result.Length); | |
194 | + | |
195 | + return result; | |
196 | + } | |
197 | + | |
198 | + public unsafe int CommonPrefixSearch(char* key, int len, DoubleArray.ResultPair* result, int rLen) | |
199 | + { | |
200 | + //if (this.encoding == Encoding.Unicode) | |
201 | + // return this.da.CommonPrefixSearch((byte*)key, result, rLen, len); | |
202 | + | |
203 | + //エンコード | |
204 | + int maxByteLen = this.encoding.GetMaxByteCount(len); | |
205 | + byte* bytes = stackalloc byte[maxByteLen]; | |
206 | + int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteLen); | |
207 | + | |
208 | + int n = this.da.CommonPrefixSearch(bytes, result, rLen, bytesLen); | |
209 | + | |
210 | + //文字数をデコードしたものに変換 | |
211 | + for (int i = 0; i < n; i++) | |
212 | + result[i].Length = this.encoding.GetCharCount(bytes, result[i].Length); | |
213 | + | |
214 | + return n; | |
215 | + } | |
216 | + | |
217 | + #endregion | |
218 | + | |
219 | + #region Get Infomation | |
220 | + | |
221 | + public unsafe Token[] GetToken(DoubleArray.ResultPair n) | |
222 | + { | |
223 | + Token[] dist = new Token[0xFF & n.Value]; | |
224 | + int tokenPos = n.Value >> 8; | |
225 | +#if MMF_DIC | |
226 | + this.tokens.ReadArray<Token>(tokenPos * sizeof(Token), dist, 0, dist.Length); | |
227 | +#else | |
228 | + Array.Copy(this.tokens, tokenPos, dist, 0, dist.Length); | |
229 | +#endif | |
230 | + return dist; | |
231 | + } | |
232 | + | |
233 | + public string GetFeature(uint featurePos) | |
234 | + { | |
235 | + return StrUtils.GetString(this.features, (long)featurePos, this.encoding); | |
236 | + } | |
237 | + | |
238 | + #endregion | |
239 | + | |
240 | + #region etc. | |
241 | + | |
242 | + public bool IsCompatible(MeCabDictionary d) | |
243 | + { | |
244 | + return (this.Version == d.Version && | |
245 | + this.LSize == d.LSize && | |
246 | + this.RSize == d.RSize && | |
247 | + this.CharSet == d.CharSet); | |
248 | + } | |
249 | + | |
250 | + #endregion | |
251 | + | |
252 | + #region Dispose | |
253 | + | |
254 | + private bool disposed; | |
255 | + | |
256 | + /// <summary> | |
257 | + /// 使用されているリソースを開放する | |
258 | + /// </summary> | |
259 | + public void Dispose() | |
260 | + { | |
261 | + this.Dispose(true); | |
262 | + GC.SuppressFinalize(this); | |
263 | + } | |
264 | + | |
265 | + protected virtual void Dispose(bool disposing) | |
266 | + { | |
267 | + if (disposed) return; | |
268 | + | |
269 | + if (disposing) | |
270 | + { | |
271 | + if (this.da != null) this.da.Dispose(); | |
272 | +#if MMF_DIC | |
273 | + if (this.mmf != null) this.mmf.Dispose(); | |
274 | + if (this.tokens != null) this.tokens.Dispose(); | |
275 | + if (this.features != null) this.features.Dispose(); | |
276 | +#endif | |
277 | + } | |
278 | + | |
279 | + this.disposed = true; | |
280 | + } | |
281 | + | |
282 | + ~MeCabDictionary() | |
283 | + { | |
284 | + this.Dispose(false); | |
285 | + } | |
286 | + | |
287 | + #endregion | |
288 | + } | |
289 | +} |
@@ -1,117 +1,132 @@ | ||
1 | -using System; | |
2 | -using System.Collections.Generic; | |
3 | -using System.Text; | |
4 | -#if MMF_DIC | |
5 | -using System.IO.MemoryMappedFiles; | |
6 | -#endif | |
7 | - | |
8 | -namespace NMeCab.Core | |
9 | -{ | |
10 | - public static class StrUtils | |
11 | - { | |
12 | - private const byte Nul = (byte)0; | |
13 | - | |
14 | - /// <summary> | |
15 | - /// バイト配列の中から終端が\0で表された文字列を取り出す。 | |
16 | - /// </summary> | |
17 | - /// <remarks> | |
18 | - /// バイト配列の長さはInt32.MaxValueを超えていても良い。 | |
19 | - /// </remarks> | |
20 | - /// <param name="bytes">バイト配列</param> | |
21 | - /// <param name="enc">文字エンコーディング</param> | |
22 | - /// <returns>文字列(\0は含まない)</returns> | |
23 | - public static string GetString(byte[] bytes, Encoding enc) | |
24 | - { | |
25 | - return StrUtils.GetString(bytes, 0L, enc); | |
26 | - } | |
27 | - | |
28 | - /// <summary> | |
29 | - /// バイト配列の中から終端が\0で表された文字列を取り出す。 | |
30 | - /// </summary> | |
31 | - /// <remarks> | |
32 | - /// バイト配列の長さはInt32.MaxValueを超えていても良い。 | |
33 | - /// </remarks> | |
34 | - /// <param name="bytes">バイト配列</param> | |
35 | - /// <param name="offset">オフセット位置</param> | |
36 | - /// <param name="enc">文字エンコーディング</param> | |
37 | - /// <returns>文字列(\0は含まない)</returns> | |
38 | - public unsafe static string GetString(byte[] bytes, long offset, Encoding enc) | |
39 | - { | |
40 | - fixed (byte* pBytes = bytes) | |
41 | - return StrUtils.GetString(pBytes + offset, enc); | |
42 | - } | |
43 | - | |
44 | - /// <summary> | |
45 | - /// バイト配列の中から終端が\0で表された文字列を取り出す。 | |
46 | - /// </summary> | |
47 | - /// <remarks> | |
48 | - /// バイト配列の長さはInt32.MaxValueを超えていても良い。 | |
49 | - /// </remarks> | |
50 | - /// <param name="bytes">デコードする最初のバイトへのポインタ</param> | |
51 | - /// <param name="enc">文字エンコーディング</param> | |
52 | - /// <returns>文字列(\0は含まない)</returns> | |
53 | - public unsafe static string GetString(byte* bytes, Encoding enc) | |
54 | - { | |
55 | - //バイト長のカウント | |
56 | - int byteCount = 0; | |
57 | - while (*bytes != Nul) //終端\0に到達するまでシーク | |
58 | - { | |
59 | - checked { byteCount++; } //文字列のバイト長がInt32.MaxValueを超えたならエラー | |
60 | - bytes++; | |
61 | - } | |
62 | - bytes -= byteCount; | |
63 | - | |
64 | - //生成されうる最大文字数のバッファを確保 | |
65 | - int maxCharCount = enc.GetMaxCharCount(byteCount); | |
66 | - fixed (char* buff = new char[maxCharCount]) | |
67 | - { | |
68 | - //バイト配列を文字列にデコード | |
69 | - int len = enc.GetChars(bytes, byteCount, buff, maxCharCount); | |
70 | - return new string(buff, 0, len); | |
71 | - } | |
72 | - } | |
73 | - | |
74 | -#if MMF_DIC | |
75 | - | |
76 | - /// <summary> | |
77 | - /// MemoryMappedViewAccessorから終端が\0で表された文字列を取り出す。 | |
78 | - /// </summary> | |
79 | - /// <remarks> | |
80 | - /// MemoryMappedViewAccessorの容量はInt32.MaxValueを超えていても良い。 | |
81 | - /// </remarks> | |
82 | - /// <param name="accessor">MemoryMappedViewAccessor</param> | |
83 | - /// <param name="index">オフセット位置</param> | |
84 | - /// <param name="enc">文字エンコーディング</param> | |
85 | - /// <param name="buffSize">内部で使用するバッファの初期サイズ</param> | |
86 | - /// <returns>文字列(\0は含まない)</returns> | |
87 | - public static string GetString(MemoryMappedViewAccessor accessor, long offset, Encoding enc, | |
88 | - int buffSize = 128) | |
89 | - { | |
90 | - byte[] buff = new byte[buffSize]; //IO回数削減のためのバッファ配列 | |
91 | - accessor.ReadArray<byte>(offset, buff, 0, buffSize); //初期読込 | |
92 | - | |
93 | - //バイト長のカウント | |
94 | - int byteCount = 0; | |
95 | - while (buff[byteCount] != Nul) //終端\0に到達するまでシーク | |
96 | - { | |
97 | - byteCount++; | |
98 | - | |
99 | - if (byteCount == buffSize) //バッファ配列の終端 | |
100 | - { | |
101 | - //バッファ配列の拡張と追加読込 | |
102 | - checked { buffSize *= 2; } //Int32.MaxValueを超えたならエラー | |
103 | - byte[] newBuff = new byte[buffSize]; | |
104 | - Buffer.BlockCopy(buff, 0, newBuff, 0, byteCount); | |
105 | - accessor.ReadArray<byte>(offset + byteCount, newBuff, byteCount, buffSize - byteCount); | |
106 | - buff = newBuff; | |
107 | - } | |
108 | - } | |
109 | - | |
110 | - //バッファ配列を文字列にデコード | |
111 | - return enc.GetString(buff, 0, byteCount); | |
112 | - } | |
113 | - | |
114 | -#endif | |
115 | - | |
116 | - } | |
117 | -} | |
1 | +using System; | |
2 | +using System.Collections.Generic; | |
3 | +using System.Text; | |
4 | +#if MMF_DIC | |
5 | +using System.IO.MemoryMappedFiles; | |
6 | +#endif | |
7 | + | |
8 | +namespace NMeCab.Core | |
9 | +{ | |
10 | + public static class StrUtils | |
11 | + { | |
12 | + private const byte Nul = (byte)0; | |
13 | + | |
14 | + /// <summary> | |
15 | + /// バイト配列の中から終端が\0で表された文字列を取り出す。 | |
16 | + /// </summary> | |
17 | + /// <remarks> | |
18 | + /// バイト配列の長さはInt32.MaxValueを超えていても良い。 | |
19 | + /// </remarks> | |
20 | + /// <param name="bytes">バイト配列</param> | |
21 | + /// <param name="enc">文字エンコーディング</param> | |
22 | + /// <returns>文字列(\0は含まない)</returns> | |
23 | + public static string GetString(byte[] bytes, Encoding enc) | |
24 | + { | |
25 | + return StrUtils.GetString(bytes, 0L, enc); | |
26 | + } | |
27 | + | |
28 | + /// <summary> | |
29 | + /// バイト配列の中から終端が\0で表された文字列を取り出す。 | |
30 | + /// </summary> | |
31 | + /// <remarks> | |
32 | + /// バイト配列の長さはInt32.MaxValueを超えていても良い。 | |
33 | + /// </remarks> | |
34 | + /// <param name="bytes">バイト配列</param> | |
35 | + /// <param name="offset">オフセット位置</param> | |
36 | + /// <param name="enc">文字エンコーディング</param> | |
37 | + /// <returns>文字列(\0は含まない)</returns> | |
38 | + public unsafe static string GetString(byte[] bytes, long offset, Encoding enc) | |
39 | + { | |
40 | + fixed (byte* pBytes = bytes) | |
41 | + return StrUtils.GetString(pBytes + offset, enc); | |
42 | + } | |
43 | + | |
44 | + /// <summary> | |
45 | + /// バイト配列の中から終端が\0で表された文字列を取り出す。 | |
46 | + /// </summary> | |
47 | + /// <remarks> | |
48 | + /// バイト配列の長さはInt32.MaxValueを超えていても良い。 | |
49 | + /// </remarks> | |
50 | + /// <param name="bytes">デコードする最初のバイトへのポインタ</param> | |
51 | + /// <param name="enc">文字エンコーディング</param> | |
52 | + /// <returns>文字列(\0は含まない)</returns> | |
53 | + public unsafe static string GetString(byte* bytes, Encoding enc) | |
54 | + { | |
55 | + //バイト長のカウント | |
56 | + int byteCount = 0; | |
57 | + while (*bytes != Nul) //終端\0に到達するまでシーク | |
58 | + { | |
59 | + checked { byteCount++; } //文字列のバイト長がInt32.MaxValueを超えたならエラー | |
60 | + bytes++; | |
61 | + } | |
62 | + bytes -= byteCount; | |
63 | + | |
64 | + //生成されうる最大文字数のバッファを確保 | |
65 | + int maxCharCount = enc.GetMaxCharCount(byteCount); | |
66 | + fixed (char* buff = new char[maxCharCount]) | |
67 | + { | |
68 | + //バイト配列を文字列にデコード | |
69 | + int len = enc.GetChars(bytes, byteCount, buff, maxCharCount); | |
70 | + return new string(buff, 0, len); | |
71 | + } | |
72 | + } | |
73 | + | |
74 | +#if MMF_DIC | |
75 | + | |
76 | + /// <summary> | |
77 | + /// MemoryMappedViewAccessorから終端が\0で表された文字列を取り出す。 | |
78 | + /// </summary> | |
79 | + /// <remarks> | |
80 | + /// MemoryMappedViewAccessorの容量はInt32.MaxValueを超えていても良い。 | |
81 | + /// </remarks> | |
82 | + /// <param name="accessor">MemoryMappedViewAccessor</param> | |
83 | + /// <param name="index">オフセット位置</param> | |
84 | + /// <param name="enc">文字エンコーディング</param> | |
85 | + /// <param name="buffSize">内部で使用するバッファの初期サイズ</param> | |
86 | + /// <returns>文字列(\0は含まない)</returns> | |
87 | + public static string GetString(MemoryMappedViewAccessor accessor, long offset, Encoding enc, | |
88 | + int buffSize = 128) | |
89 | + { | |
90 | + byte[] buff = new byte[buffSize]; //IO回数削減のためのバッファ配列 | |
91 | + accessor.ReadArray<byte>(offset, buff, 0, buffSize); //初期読込 | |
92 | + | |
93 | + //バイト長のカウント | |
94 | + int byteCount = 0; | |
95 | + while (buff[byteCount] != Nul) //終端\0に到達するまでシーク | |
96 | + { | |
97 | + byteCount++; | |
98 | + | |
99 | + if (byteCount == buffSize) //バッファ配列の終端 | |
100 | + { | |
101 | + //バッファ配列の拡張と追加読込 | |
102 | + checked { buffSize *= 2; } //Int32.MaxValueを超えたならエラー | |
103 | + byte[] newBuff = new byte[buffSize]; | |
104 | + Buffer.BlockCopy(buff, 0, newBuff, 0, byteCount); | |
105 | + accessor.ReadArray<byte>(offset + byteCount, newBuff, byteCount, buffSize - byteCount); | |
106 | + buff = newBuff; | |
107 | + } | |
108 | + } | |
109 | + | |
110 | + //バッファ配列を文字列にデコード | |
111 | + return enc.GetString(buff, 0, byteCount); | |
112 | + } | |
113 | + | |
114 | +#endif | |
115 | + | |
116 | + /// <summary> | |
117 | + /// 指定の名前に対応するエンコーディングを取得する(.NET FWが対応していない名前にもアドホックに対応) | |
118 | + /// </summary> | |
119 | + /// <param name="name"></param> | |
120 | + /// <returns></returns> | |
121 | + public static Encoding GetEncoding(string name) | |
122 | + { | |
123 | + switch (name.ToUpper()) | |
124 | + { | |
125 | + case "UTF8": | |
126 | + return Encoding.UTF8; | |
127 | + default: | |
128 | + return Encoding.GetEncoding(name); | |
129 | + } | |
130 | + } | |
131 | + } | |
132 | +} |