• R/O
  • HTTP
  • SSH
  • HTTPS

NMeCabRepo2: Commit


Commit MetaInfo

Revision68e7fe21146876cfa7e30d18fa589637cf8c7ad2 (tree)
Zeit2019-09-13 11:12:34
Autorkomutan <komutan@jp>
Commiterkomutan

Log Message

MMFへのアクセスをアンマネージドポインタ経由として高速化

Ändern Zusammenfassung

Diff

--- a/src/LibNMeCab/Core/CharInfo.cs
+++ b/src/LibNMeCab/Core/CharInfo.cs
@@ -10,9 +10,9 @@ namespace NMeCab.Core
1010 {
1111 public struct CharInfo
1212 {
13- #region Const/Field/Property
13+ #region Const/Field/Property
1414
15- private readonly uint bits;
15+ private uint bits;
1616
1717 /// <summary>
1818 /// 互換カテゴリ
--- a/src/LibNMeCab/Core/CharProperty.cs
+++ b/src/LibNMeCab/Core/CharProperty.cs
@@ -32,8 +32,8 @@ namespace NMeCab.Core
3232 {
3333 string fileName = Path.Combine(dicDir, CharPropertyFile);
3434
35- using (FileStream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read))
36- using (BinaryReader reader = new BinaryReader(stream))
35+ using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read))
36+ using (var reader = new BinaryReader(stream))
3737 {
3838 this.Open(reader, fileName);
3939 }
--- a/src/LibNMeCab/Core/Connector.cs
+++ b/src/LibNMeCab/Core/Connector.cs
@@ -20,7 +20,8 @@ namespace NMeCab.Core
2020
2121 #if MMF_MTX
2222 private MemoryMappedFile mmf;
23- private MemoryMappedViewAccessor matrix;
23+ private MemoryMappedViewAccessor mmva;
24+ private unsafe short* matrix;
2425 #else
2526 private short[] matrix;
2627 #endif
@@ -40,36 +41,33 @@ namespace NMeCab.Core
4041 }
4142
4243 #if MMF_MTX
43-
44- public void Open(string fileName)
44+ public unsafe void Open(string fileName)
4545 {
46- //MMFインスタンスを生成するが、後でDisposeするために保持しておく
47- this.mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open,
48- null, 0L, MemoryMappedFileAccess.Read);
49- this.Open(this.mmf);
50- }
46+ this.mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open, null, 0L, MemoryMappedFileAccess.Read);
47+ this.mmva = this.mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read);
5148
52- public void Open(MemoryMappedFile mmf)
53- {
54- using (MemoryMappedViewStream stream = mmf.CreateViewStream(
55- 0L, 0L, MemoryMappedFileAccess.Read))
56- using (BinaryReader reader = new BinaryReader(stream))
49+ byte* ptr = null;
50+ this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
51+
52+ using (var stream = mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read))
53+ using (var reader = new BinaryReader(stream))
5754 {
5855 this.LSize = reader.ReadUInt16();
5956 this.RSize = reader.ReadUInt16();
6057
61- long offset = stream.Position;
62- long size = this.LSize * this.RSize * sizeof(short);
63- this.matrix = mmf.CreateViewAccessor(offset, size, MemoryMappedFileAccess.Read);
58+ long fSize = stream.Position + sizeof(short) * this.LSize * this.RSize;
59+ if (this.mmva.Capacity < fSize)
60+ throw new MeCabInvalidFileException("file size is invalid", fileName);
61+
62+ ptr += stream.Position;
63+ this.matrix = (short*)ptr;
6464 }
6565 }
66-
6766 #else
68-
6967 public void Open(string fileName)
7068 {
71- using (FileStream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read))
72- using (BinaryReader reader = new BinaryReader(stream))
69+ using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read))
70+ using (var reader = new BinaryReader(stream))
7371 {
7472 this.Open(reader, fileName);
7573 }
@@ -89,22 +87,16 @@ namespace NMeCab.Core
8987 if (reader.BaseStream.ReadByte() != -1)
9088 throw new MeCabInvalidFileException("file size is invalid", fileName);
9189 }
92-
9390 #endif
9491
9592 #endregion
9693
9794 #region Cost
9895
99- public int Cost(MeCabNode lNode, MeCabNode rNode)
96+ public unsafe int Cost(MeCabNode lNode, MeCabNode rNode)
10097 {
10198 int pos = lNode.RCAttr + this.LSize * rNode.LCAttr;
102-
103-#if MMF_MTX
104- return this.matrix.ReadInt16(pos * sizeof(short)) + rNode.WCost;
105-#else
10699 return this.matrix[pos] + rNode.WCost;
107-#endif
108100 }
109101
110102 #endregion
@@ -129,8 +121,14 @@ namespace NMeCab.Core
129121 if (disposing)
130122 {
131123 #if MMF_MTX
132- if (this.mmf != null) this.mmf.Dispose();
133- if (this.matrix != null) this.matrix.Dispose();
124+ if (this.mmva != null)
125+ {
126+ this.mmva.SafeMemoryMappedViewHandle.ReleasePointer();
127+ this.mmva.Dispose();
128+ }
129+
130+ if (this.mmf != null)
131+ this.mmf.Dispose();
134132 #endif
135133 }
136134
--- a/src/LibNMeCab/Core/DoubleArray.cs
+++ b/src/LibNMeCab/Core/DoubleArray.cs
@@ -5,9 +5,8 @@
55 using System;
66 using System.Collections.Generic;
77 using System.Text;
8+#if !MMF_DIC
89 using System.IO;
9-#if MMF_DIC
10-using System.IO.MemoryMappedFiles;
1110 #endif
1211
1312 namespace NMeCab.Core
@@ -15,37 +14,30 @@ namespace NMeCab.Core
1514 /// <summary>
1615 /// Double-Array Trie の実装
1716 /// </summary>
18- public class DoubleArray : IDisposable
17+ public class DoubleArray
1918 {
2019 #region Array
2120
2221 private struct Unit
2322 {
24- public readonly int Base;
25- public readonly uint Check;
26-
27- public Unit(int b, uint c)
28- {
29- this.Base = b;
30- this.Check = c;
31- }
23+#pragma warning disable 0649
24+ public int Base;
25+ public uint Check;
26+#pragma warning restore 0649
3227 }
3328
3429 public const int UnitSize = sizeof(int) + sizeof(uint);
3530
3631 #if MMF_DIC
3732
38- private MemoryMappedViewAccessor accessor;
33+ private unsafe Unit* array;
3934
4035 public int Size
4136 {
42- get { return (int)(this.accessor.Capacity) / UnitSize; }
37+ get { return this.TotalSize / UnitSize; }
4338 }
4439
45- public int TotalSize
46- {
47- get { return (int)(this.accessor.Capacity); }
48- }
40+ public int TotalSize { get; private set; }
4941
5042 #else
5143
@@ -69,20 +61,25 @@ namespace NMeCab.Core
6961
7062 #if MMF_DIC
7163
72- public void Open(MemoryMappedFile mmf, long offset, long size)
64+ public unsafe void Open(byte* ptr, int size)
7365 {
74- this.accessor = mmf.CreateViewAccessor(offset, size, MemoryMappedFileAccess.Read);
66+ this.array = (Unit*)ptr;
67+ this.TotalSize = size;
7568 }
7669
7770 #else
7871
79- public void Open(BinaryReader reader, uint size)
72+ public void Open(BinaryReader reader, int size)
8073 {
8174 this.array = new Unit[size / UnitSize];
8275
8376 for (int i = 0; i < array.Length; i++)
8477 {
85- this.array[i] = new Unit(reader.ReadInt32(), reader.ReadUInt32());
78+ this.array[i] = new Unit()
79+ {
80+ Base = reader.ReadInt32(),
81+ Check = reader.ReadUInt32()
82+ };
8683 }
8784 }
8885
@@ -95,71 +92,60 @@ namespace NMeCab.Core
9592 public struct ResultPair
9693 {
9794 public int Value;
98-
9995 public int Length;
100-
101- public ResultPair(int r, int t)
102- {
103- this.Value = r;
104- this.Length = t;
105- }
106- }
107-
108- public unsafe void ExactMatchSearch(byte* key, ResultPair* result, int len, int nodePos)
109- {
110- *result = this.ExactMatchSearch(key, len, nodePos);
11196 }
11297
11398 public unsafe ResultPair ExactMatchSearch(byte* key, int len, int nodePos)
11499 {
115- int b = this.ReadBase(nodePos);
116- Unit p;
100+ int b = this.array[nodePos].Base;
101+ int p;
117102
118103 for (int i = 0; i < len; i++)
119104 {
120- this.ReadUnit(b + key[i] + 1, out p);
121- if (b == p.Check)
105+ p = b + key[i] + 1;
106+ if (b == this.array[p].Check)
122107 {
123- b = p.Base;
108+ b = this.array[p].Base;
124109 }
125110 else
126111 {
127- return new ResultPair(-1, 0);
112+ return new ResultPair() { Value = -1, Length = 0 };
128113 }
129114 }
130115
131- this.ReadUnit(b, out p);
132- int n = p.Base;
133- if (b == p.Check && n < 0)
116+ p = b;
117+ int n = this.array[b].Base;
118+ if (b == this.array[p].Check && n < 0)
134119 {
135- return new ResultPair(-n - 1, len);
120+ return new ResultPair() { Value = -n - 1, Length = 0 };
136121 }
137122
138- return new ResultPair(-1, 0);
123+ return new ResultPair() { Value = -1, Length = 0 };
139124 }
140125
141126 public unsafe int CommonPrefixSearch(byte* key, ResultPair* result, int resultLen, int len, int nodePos = 0)
142127 {
143- int b = this.ReadBase(nodePos);
128+ int b = this.array[nodePos].Base;
144129 int num = 0;
145130 int n;
146- Unit p;
131+ int p;
147132
148133 for (int i = 0; i < len; i++)
149134 {
150- this.ReadUnit(b, out p);
151- n = p.Base;
135+ p = b;
136+ n = this.array[p].Base;
152137
153- if (b == p.Check && n < 0)
138+ if (b == this.array[p].Check && n < 0)
154139 {
155- if (num < resultLen) result[num] = new ResultPair(-n - 1, i);
140+ if (num < resultLen)
141+ result[num] = new ResultPair() { Value = -n - 1, Length = i };
156142 num++;
157143 }
158144
159- this.ReadUnit(b + key[i] + 1, out p);
160- if (b == p.Check)
145+ p = b + key[i] + 1;
146+ if (b == this.array[p].Check)
161147 {
162- b = p.Base;
148+ b = this.array[p].Base;
163149 }
164150 else
165151 {
@@ -167,69 +153,19 @@ namespace NMeCab.Core
167153 }
168154 }
169155
170- this.ReadUnit(b, out p);
171- n = p.Base;
156+ p = b;
157+ n = this.array[p].Base;
172158
173- if (b == p.Check && n < 0)
159+ if (b == this.array[p].Check && n < 0)
174160 {
175- if (num < resultLen) result[num] = new ResultPair(-n - 1, len);
161+ if (num < resultLen)
162+ result[num] = new ResultPair() { Value = -n - 1, Length = len };
176163 num++;
177164 }
178165
179166 return num;
180167 }
181168
182-
183-
184- private int ReadBase(int pos)
185- {
186-#if MMF_DIC
187- return this.accessor.ReadInt32(pos * UnitSize);
188-#else
189- return this.array[pos].Base;
190-#endif
191- }
192-
193- private void ReadUnit(int pos, out Unit unit)
194- {
195-#if MMF_DIC
196- this.accessor.Read<Unit>(pos * UnitSize, out unit);
197-#else
198- unit = this.array[pos];
199-#endif
200- }
201-
202- #endregion
203-
204- #region Dispose
205-
206- private bool disposed;
207-
208- public void Dispose()
209- {
210- this.Dispose(true);
211- GC.SuppressFinalize(this);
212- }
213-
214- protected virtual void Dispose(bool disposing)
215- {
216- if (disposed) return;
217-
218- if (disposing)
219- {
220-#if MMF_DIC
221- if (this.accessor != null) this.accessor.Dispose();
222-#endif
223- }
224-
225- this.disposed = true;
226- }
227-
228- ~DoubleArray()
229- {
230- this.Dispose(false);
231- }
232-
233169 #endregion
234170 }
235171 }
--- a/src/LibNMeCab/Core/MeCabDictionary.cs
+++ b/src/LibNMeCab/Core/MeCabDictionary.cs
@@ -21,14 +21,15 @@ namespace NMeCab.Core
2121
2222 #if MMF_DIC
2323 private MemoryMappedFile mmf;
24- private MemoryMappedViewAccessor tokens;
25- private MemoryMappedViewAccessor features;
24+ private MemoryMappedViewAccessor mmva;
25+ private unsafe Token* tokens;
26+ private unsafe byte* features;
2627 #else
2728 private Token[] tokens;
2829 private byte[] features;
2930 #endif
3031
31- private DoubleArray da = new DoubleArray();
32+ private readonly DoubleArray da = new DoubleArray();
3233
3334 private Encoding encoding;
3435
@@ -73,28 +74,26 @@ namespace NMeCab.Core
7374
7475 #if MMF_DIC
7576
76- public void Open(string filePath)
77+ public unsafe void Open(string fileName)
7778 {
78- this.mmf = MemoryMappedFile.CreateFromFile(filePath, FileMode.Open,
79- null, 0L, MemoryMappedFileAccess.Read);
80- this.Open(this.mmf, filePath);
81- }
79+ this.FileName = fileName;
8280
83- public void Open(MemoryMappedFile mmf, string filePath = null)
84- {
85- this.FileName = filePath;
81+ this.mmf = MemoryMappedFile.CreateFromFile(fileName, FileMode.Open, null, 0L, MemoryMappedFileAccess.Read);
82+ this.mmva = mmf.CreateViewAccessor(0L, 0L, MemoryMappedFileAccess.Read);
8683
87- using (MemoryMappedViewStream stream = mmf.CreateViewStream(
88- 0L, 0L, MemoryMappedFileAccess.Read))
89- using (BinaryReader reader = new BinaryReader(stream))
84+ byte* ptr = null;
85+ this.mmva.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
86+
87+ using (var stream = this.mmf.CreateViewStream(0L, 0L, MemoryMappedFileAccess.Read))
88+ using (var reader = new BinaryReader(stream))
9089 {
9190 uint magic = reader.ReadUInt32();
92- if (stream.CanSeek && stream.Length < (magic ^ DictionaryMagicID)) //正確なサイズ取得ができないので不等号で代用
93- throw new MeCabInvalidFileException("dictionary file is broken", filePath);
91+ if (this.mmva.Capacity < (magic ^ DictionaryMagicID))
92+ throw new MeCabInvalidFileException("dictionary file is broken", fileName);
9493
9594 this.Version = reader.ReadUInt32();
9695 if (this.Version != DicVersion)
97- throw new MeCabInvalidFileException("incompatible version", filePath);
96+ throw new MeCabInvalidFileException("incompatible version", fileName);
9897
9998 this.Type = (DictionaryType)reader.ReadUInt32();
10099 this.LexSize = reader.ReadUInt32();
@@ -108,22 +107,25 @@ namespace NMeCab.Core
108107 string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
109108 this.encoding = StrUtils.GetEncoding(charSet);
110109
111- long offset = stream.Position;
112- this.da.Open(mmf, offset, dSize);
113- offset += dSize;
114- this.tokens = mmf.CreateViewAccessor(offset, tSize, MemoryMappedFileAccess.Read);
115- offset += tSize;
116- this.features = mmf.CreateViewAccessor(offset, fSize, MemoryMappedFileAccess.Read);
110+ ptr += stream.Position;
111+
112+ this.da.Open(ptr, (int)dSize);
113+ ptr += dSize;
114+
115+ this.tokens = (Token*)ptr;
116+ ptr += tSize;
117+
118+ this.features = ptr;
117119 }
118120 }
119121
120122 #else
121123
122- public void Open(string filePath)
124+ public void Open(string fileName)
123125 {
124- this.FileName = filePath;
125-
126- using (FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read))
126+ this.FileName = fileName;
127+
128+ using (FileStream fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read))
127129 using (BinaryReader reader = new BinaryReader(fileStream))
128130 {
129131 this.Open(reader);
@@ -153,7 +155,7 @@ namespace NMeCab.Core
153155 string charSet = StrUtils.GetString(reader.ReadBytes(32), Encoding.ASCII);
154156 this.encoding = StrUtils.GetEncoding(charSet);
155157
156- this.da.Open(reader, dSize);
158+ this.da.Open(reader, (int)dSize);
157159
158160 this.tokens = new Token[tSize / sizeof(Token)];
159161 for (int i = 0; i < this.tokens.Length; i++)
@@ -187,7 +189,7 @@ namespace NMeCab.Core
187189 byte* bytes = stackalloc byte[maxByteCount];
188190 int bytesLen = this.encoding.GetBytes(key, len, bytes, maxByteCount);
189191
190- DoubleArray.ResultPair result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos);
192+ var result = this.da.ExactMatchSearch(bytes, bytesLen, nodePos);
191193
192194 //文字数をデコードしたものに変換
193195 result.Length = this.encoding.GetCharCount(bytes, result.Length);
@@ -218,19 +220,49 @@ namespace NMeCab.Core
218220
219221 #region Get Infomation
220222
221- public unsafe Token[] GetToken(DoubleArray.ResultPair n)
223+ public int GetTokenSize(int value)
222224 {
223- Token[] dist = new Token[0xFF & n.Value];
224- int tokenPos = n.Value >> 8;
225+ return 0xFF & value;
226+ }
227+
228+ public int GetTokenPos(int value)
229+ {
230+ return value >> 8;
231+ }
232+
225233 #if MMF_DIC
226- this.tokens.ReadArray<Token>(tokenPos * sizeof(Token), dist, 0, dist.Length);
234+ public unsafe Token* GetTokens(int value)
235+ {
236+ return this.tokens + this.GetTokenPos(value);
237+ }
238+
239+ public unsafe Token[] GetTokensArray(int value)
240+ {
241+ var ret = new Token[this.GetTokenSize(value)];
242+ var t = this.GetTokens(value);
243+
244+ for (int i = 0; i < ret.Length; i++)
245+ {
246+ ret[i] = t[i];
247+ }
248+
249+ return ret;
250+ }
227251 #else
228- Array.Copy(this.tokens, tokenPos, dist, 0, dist.Length);
229-#endif
230- return dist;
252+ public ArraySegment<Token> GetTokens(int value)
253+ {
254+ return new ArraySegment<Token>(this.tokens, this.GetTokenPos(value), this.GetTokenSize(value));
255+ }
256+
257+ public Token[] GetTokensArray(int value)
258+ {
259+ var ret = new Token[this.GetTokenSize(value)];
260+ Array.Copy(this.tokens, this.GetTokenPos(value), ret, 0, ret.Length);
261+ return ret;
231262 }
263+#endif
232264
233- public string GetFeature(uint featurePos)
265+ public unsafe string GetFeature(uint featurePos)
234266 {
235267 return StrUtils.GetString(this.features, (long)featurePos, this.encoding);
236268 }
@@ -268,11 +300,15 @@ namespace NMeCab.Core
268300
269301 if (disposing)
270302 {
271- if (this.da != null) this.da.Dispose();
272303 #if MMF_DIC
273- if (this.mmf != null) this.mmf.Dispose();
274- if (this.tokens != null) this.tokens.Dispose();
275- if (this.features != null) this.features.Dispose();
304+ if (this.mmva != null)
305+ {
306+ this.mmva.SafeMemoryMappedViewHandle.ReleasePointer();
307+ this.mmva.Dispose();
308+ }
309+
310+ if (this.mmf != null)
311+ this.mmf.Dispose();
276312 #endif
277313 }
278314
--- a/src/LibNMeCab/Core/StrUtils.cs
+++ b/src/LibNMeCab/Core/StrUtils.cs
@@ -38,7 +38,7 @@ namespace NMeCab.Core
3838 public unsafe static string GetString(byte[] bytes, long offset, Encoding enc)
3939 {
4040 fixed (byte* pBytes = bytes)
41- return StrUtils.GetString(pBytes + offset, enc);
41+ return StrUtils.GetString(pBytes, offset, enc);
4242 }
4343
4444 /// <summary>
@@ -47,6 +47,20 @@ namespace NMeCab.Core
4747 /// <remarks>
4848 /// バイト配列の長さはInt32.MaxValueを超えていても良い。
4949 /// </remarks>
50+ /// <param name="bytes">デコードするバイトへのポインタ</param>
51+ /// <param name="offset">オフセット位置</param>
52+ /// <param name="enc">文字エンコーディング</param>
53+ /// <returns>文字列(\0は含まない)</returns>
54+ public unsafe static string GetString(byte* bytes, long offset, Encoding enc)
55+ {
56+ return StrUtils.GetString(bytes + offset, enc);
57+ }
58+ /// <summary>
59+ /// バイト配列の中から終端が\0で表された文字列を取り出す。
60+ /// </summary>
61+ /// <remarks>
62+ /// バイト配列の長さはInt32.MaxValueを超えていても良い。
63+ /// </remarks>
5064 /// <param name="bytes">デコードする最初のバイトへのポインタ</param>
5165 /// <param name="enc">文字エンコーディング</param>
5266 /// <returns>文字列(\0は含まない)</returns>
@@ -54,12 +68,13 @@ namespace NMeCab.Core
5468 {
5569 //バイト長のカウント
5670 int byteCount = 0;
57- while (*bytes != Nul) //終端\0に到達するまでシーク
71+ while (bytes[byteCount] != Nul) //終端\0に到達するまでシーク
5872 {
5973 checked { byteCount++; } //文字列のバイト長がInt32.MaxValueを超えたならエラー
60- bytes++;
6174 }
62- bytes -= byteCount;
75+
76+ if (byteCount == 0)
77+ return "";
6378
6479 //生成されうる最大文字数のバッファを確保
6580 int maxCharCount = enc.GetMaxCharCount(byteCount);
@@ -71,48 +86,6 @@ namespace NMeCab.Core
7186 }
7287 }
7388
74-#if MMF_DIC
75-
76- /// <summary>
77- /// MemoryMappedViewAccessorから終端が\0で表された文字列を取り出す。
78- /// </summary>
79- /// <remarks>
80- /// MemoryMappedViewAccessorの容量はInt32.MaxValueを超えていても良い。
81- /// </remarks>
82- /// <param name="accessor">MemoryMappedViewAccessor</param>
83- /// <param name="index">オフセット位置</param>
84- /// <param name="enc">文字エンコーディング</param>
85- /// <param name="buffSize">内部で使用するバッファの初期サイズ</param>
86- /// <returns>文字列(\0は含まない)</returns>
87- public static string GetString(MemoryMappedViewAccessor accessor, long offset, Encoding enc,
88- int buffSize = 128)
89- {
90- byte[] buff = new byte[buffSize]; //IO回数削減のためのバッファ配列
91- accessor.ReadArray<byte>(offset, buff, 0, buffSize); //初期読込
92-
93- //バイト長のカウント
94- int byteCount = 0;
95- while (buff[byteCount] != Nul) //終端\0に到達するまでシーク
96- {
97- byteCount++;
98-
99- if (byteCount == buffSize) //バッファ配列の終端
100- {
101- //バッファ配列の拡張と追加読込
102- checked { buffSize *= 2; } //Int32.MaxValueを超えたならエラー
103- byte[] newBuff = new byte[buffSize];
104- Buffer.BlockCopy(buff, 0, newBuff, 0, byteCount);
105- accessor.ReadArray<byte>(offset + byteCount, newBuff, byteCount, buffSize - byteCount);
106- buff = newBuff;
107- }
108- }
109-
110- //バッファ配列を文字列にデコード
111- return enc.GetString(buff, 0, byteCount);
112- }
113-
114-#endif
115-
11689 /// <summary>
11790 /// 指定の名前に対応するエンコーディングを取得する(.NET FWが対応していない名前にもアドホックに対応)
11891 /// </summary>
--- a/src/LibNMeCab/Core/Token.cs
+++ b/src/LibNMeCab/Core/Token.cs
@@ -16,32 +16,32 @@ namespace NMeCab.Core
1616 /// <summary>
1717 /// 右文脈 id
1818 /// </summary>
19- public ushort LcAttr { get; set; }
19+ public ushort LcAttr;
2020
2121 /// <summary>
2222 /// 左文脈 id
2323 /// </summary>
24- public ushort RcAttr { get; set; }
24+ public ushort RcAttr;
2525
2626 /// <summary>
2727 /// 形態素 ID
2828 /// </summary>
29- public ushort PosId { get; set; }
29+ public ushort PosId;
3030
3131 /// <summary>
3232 /// 単語生起コスト
3333 /// </summary>
34- public short WCost { get; set; }
34+ public short WCost;
3535
3636 /// <summary>
3737 /// 素性情報の位置
3838 /// </summary>
39- public uint Feature { get; set; }
39+ public uint Feature;
4040
4141 /// <summary>
4242 /// reserved for noun compound
4343 /// </summary>
44- public uint Compound { get; set; }
44+ public uint Compound;
4545
4646 #endregion
4747
--- a/src/LibNMeCab/Core/Tokenizer.cs
+++ b/src/LibNMeCab/Core/Tokenizer.cs
@@ -55,7 +55,7 @@ namespace NMeCab.Core
5555 if (this.unkDic.Type != DictionaryType.Unk)
5656 throw new MeCabInvalidFileException("not a unk dictionary", this.unkDic.FileName);
5757
58- MeCabDictionary sysDic = new MeCabDictionary();
58+ var sysDic = new MeCabDictionary();
5959 sysDic.Open(Path.Combine(prefix, SysDicFile));
6060 if (sysDic.Type != DictionaryType.Sys)
6161 throw new MeCabInvalidFileException("not a system dictionary", sysDic.FileName);
@@ -63,7 +63,7 @@ namespace NMeCab.Core
6363
6464 for (int i = 0; i < param.UserDic.Length; i++)
6565 {
66- MeCabDictionary d = new MeCabDictionary();
66+ var d = new MeCabDictionary();
6767 d.Open(Path.Combine(prefix, param.UserDic[i]));
6868 if (d.Type != DictionaryType.Usr)
6969 throw new MeCabInvalidFileException("not a user dictionary", d.FileName);
@@ -76,10 +76,11 @@ namespace NMeCab.Core
7676 for (int i = 0; i < this.unkTokens.Length; i++)
7777 {
7878 string key = this.property.Name(i);
79- DoubleArray.ResultPair n = this.unkDic.ExactMatchSearch(key);
79+ var n = this.unkDic.ExactMatchSearch(key);
8080 if (n.Value == -1)
8181 throw new MeCabInvalidFileException("cannot find UNK category: " + key, this.unkDic.FileName);
82- this.unkTokens[i] = this.unkDic.GetToken(n);
82+
83+ this.unkTokens[i] = this.unkDic.GetTokensArray(n.Value);
8384 }
8485
8586 this.space = this.property.GetCharInfo(' ');
@@ -111,28 +112,34 @@ namespace NMeCab.Core
111112 if (end - begin > ushort.MaxValue) end = begin + ushort.MaxValue;
112113 char* begin2 = property.SeekToOtherType(begin, end, this.space, &cInfo, &cLen);
113114
114- DoubleArray.ResultPair* daResults = stackalloc DoubleArray.ResultPair[DAResultSize];
115+ var daResults = stackalloc DoubleArray.ResultPair[DAResultSize];
115116
116117 foreach (MeCabDictionary it in this.dic)
117118 {
118119 int n = it.CommonPrefixSearch(begin2, (int)(end - begin2), daResults, DAResultSize);
119-
120120 for (int i = 0; i < n; i++)
121121 {
122- Token[] token = it.GetToken(daResults[i]);
123- for (int j = 0; j < token.Length; j++)
122+#if MMF_DIC
123+ var tokenSize = it.GetTokenSize(daResults->Value);
124+ var tokens = it.GetTokens(daResults->Value);
125+ for (int j = 0; j < tokenSize; j++)
126+#else
127+ var seg = it.GetTokens(daResults->Value);
128+ var tokens = seg.Array;
129+ for (int j = seg.Offset; j < seg.Offset + seg.Count; j++)
130+#endif
124131 {
125- MeCabNode newNode = this.GetNewNode();
126- this.ReadNodeInfo(it, token[j], newNode);
127- //newNode.Token = token[j];
128- newNode.Length = daResults[i].Length;
129- newNode.RLength = (int)(begin2 - begin) + daResults[i].Length;
130- newNode.Surface = new string(begin2, 0, daResults[i].Length);
132+ var newNode = this.GetNewNode();
133+ this.ReadNodeInfo(it, tokens[j], newNode);
134+ newNode.Length = daResults->Length;
135+ newNode.RLength = (int)(begin2 - begin) + daResults->Length;
136+ newNode.Surface = new string(begin2, 0, newNode.Length);
131137 newNode.Stat = MeCabNodeStat.Nor;
132138 newNode.CharType = cInfo.DefaultType;
133139 newNode.BNext = resultNode;
134140 resultNode = newNode;
135141 }
142+ daResults++;
136143 }
137144 }
138145
@@ -186,18 +193,18 @@ namespace NMeCab.Core
186193 private unsafe void AddUnknown(ref MeCabNode resultNode, CharInfo cInfo,
187194 char* begin, char* begin2, char* begin3)
188195 {
189- Token[] token = this.unkTokens[cInfo.DefaultType];
196+ var token = this.unkTokens[cInfo.DefaultType];
190197 for (int i = 0; i < token.Length; i++)
191198 {
192- MeCabNode newNode = this.GetNewNode();
199+ var newNode = this.GetNewNode();
193200 this.ReadNodeInfo(this.unkDic, token[i], newNode);
194- newNode.CharType = cInfo.DefaultType;
195- newNode.Surface = new string(begin2, 0, (int)(begin3 - begin2));
196201 newNode.Length = (int)(begin3 - begin2);
197202 newNode.RLength = (int)(begin3 - begin);
198- newNode.BNext = resultNode;
203+ newNode.Surface = new string(begin2, 0, newNode.Length);
204+ newNode.CharType = cInfo.DefaultType;
199205 newNode.Stat = MeCabNodeStat.Unk;
200206 if (this.unkFeature != null) newNode.Feature = this.unkFeature;
207+ newNode.BNext = resultNode;
201208 resultNode = newNode;
202209 }
203210 }
@@ -208,7 +215,7 @@ namespace NMeCab.Core
208215
209216 public MeCabNode GetBosNode()
210217 {
211- MeCabNode bosNode = this.GetNewNode();
218+ var bosNode = this.GetNewNode();
212219 bosNode.Surface = BosKey; // dummy
213220 bosNode.Feature = this.bosFeature;
214221 bosNode.IsBest = true;
@@ -218,14 +225,14 @@ namespace NMeCab.Core
218225
219226 public MeCabNode GetEosNode()
220227 {
221- MeCabNode eosNode = this.GetBosNode(); // same
228+ var eosNode = this.GetBosNode(); // same
222229 eosNode.Stat = MeCabNodeStat.Eos;
223230 return eosNode;
224231 }
225232
226233 public MeCabNode GetNewNode()
227234 {
228- MeCabNode node = new MeCabNode();
235+ var node = new MeCabNode();
229236 #if NeedId
230237 node.Id = Tokenizer.id++;
231238 #endif
@@ -251,10 +258,12 @@ namespace NMeCab.Core
251258 if (disposing)
252259 {
253260 if (this.dic != null)
254- foreach (MeCabDictionary d in this.dic)
255- if (d != null) d.Dispose();
261+ foreach (var d in this.dic)
262+ if (d != null)
263+ d.Dispose();
256264
257- if (this.unkDic != null) this.unkDic.Dispose();
265+ if (this.unkDic != null)
266+ this.unkDic.Dispose();
258267 }
259268
260269 this.disposed = true;
Show on old repository browser