Revision | fcaf12ad856698c3111960910928cd9a699584c5 (tree) |
---|---|
Zeit | 2017-09-24 14:02:49 |
Autor | umorigu <umorigu@gmai...> |
Commiter | umorigu |
Implement hiragana / katakana / hankaku-katakana search
@@ -11,6 +11,7 @@ window.addEventListener && window.addEventListener('DOMContentLoaded', function( | ||
11 | 11 | var maxResultLines = 20; |
12 | 12 | var minBlockLines = 5; |
13 | 13 | var minSearchWaitMilliseconds = 100; |
14 | + var kanaMap = null; | |
14 | 15 | function escapeHTML (s) { |
15 | 16 | if(typeof s !== 'string') { |
16 | 17 | s = '' + s; |
@@ -173,16 +174,70 @@ window.addEventListener && window.addEventListener('DOMContentLoaded', function( | ||
173 | 174 | }, interval); |
174 | 175 | } |
175 | 176 | } |
177 | + function prepareKanaMap() { | |
178 | + if (kanaMap !== null) return; | |
179 | + var dakuten = '\uFF9E'; | |
180 | + var maru = '\uFF9F'; | |
181 | + var map = {}; | |
182 | + for (var c = 0xFF61; c <=0xFF9F; c++) { | |
183 | + var han = String.fromCharCode(c); | |
184 | + var zen = han.normalize('NFKC'); | |
185 | + map[zen] = han; | |
186 | + var hanDaku = han + dakuten; | |
187 | + var zenDaku = hanDaku.normalize('NFKC'); | |
188 | + if (zenDaku.length === 1) { // +Handaku-ten OK | |
189 | + map[zenDaku] = hanDaku; | |
190 | + } | |
191 | + var hanMaru = han + maru; | |
192 | + var zenMaru = hanMaru.normalize('NFKC'); | |
193 | + if (zenMaru.length === 1) { // +Maru OK | |
194 | + map[zenMaru] = hanMaru; | |
195 | + } | |
196 | + } | |
197 | + kanaMap = map; | |
198 | + } | |
176 | 199 | function textToRegex(searchText) { |
177 | 200 | var regEscape = /[\\^$.*+?()[\]{}|]/g; |
201 | + // 1:Symbol 2:Katakana 3:Hiragana | |
202 | + var regRep = /([\\^$.*+?()[\]{}|])|([\u30a1-\u30f6])|([\u3041-\u3096])/g; | |
178 | 203 | var s1 = searchText.replace(/^\s+|\s+$/g, ''); |
179 | 204 | var sp = s1.split(/\s+/); |
180 | 205 | var rText = ''; |
206 | + prepareKanaMap(); | |
181 | 207 | for (var i = 0; i < sp.length; i++) { |
182 | 208 | if (rText !== '') { |
183 | 209 | rText += '|' |
184 | 210 | } |
185 | - rText += '(' + sp[i].replace(regEscape, '\\$&') + ')'; | |
211 | + var s = sp[i]; | |
212 | + if (s.normalize) { | |
213 | + s = s.normalize('NFKC'); | |
214 | + } | |
215 | + var s2 = s.replace(regRep, function(m, m1, m2, m3){ | |
216 | + if (m1) { | |
217 | + // Symbol - escape with prior backslach | |
218 | + return '\\' + m1; | |
219 | + } else if (m2) { | |
220 | + // Katakana | |
221 | + var r = '(?:' + String.fromCharCode(m2.charCodeAt(0) - 0x60) + | |
222 | + '|' + m2; | |
223 | + if (kanaMap[m2]) { | |
224 | + r += '|' + kanaMap[m2]; | |
225 | + } | |
226 | + r += ')'; | |
227 | + return r; | |
228 | + } else if (m3) { | |
229 | + // Hiragana | |
230 | + var katakana = String.fromCharCode(m3.charCodeAt(0) + 0x60); | |
231 | + var r = '(?:' + m3 + '|' + katakana; | |
232 | + if (kanaMap[katakana]) { | |
233 | + r += '|' + kanaMap[katakana]; | |
234 | + } | |
235 | + r += ')'; | |
236 | + return r; | |
237 | + } | |
238 | + return m; | |
239 | + }); | |
240 | + rText += '(' + s2 + ')'; | |
186 | 241 | } |
187 | 242 | return new RegExp(rText, 'ig'); |
188 | 243 | } |