• R/O
  • SSH
  • HTTPS

tortoisesvn: Commit


Commit MetaInfo

Revision5293 (tree)
Zeit2005-12-30 01:35:12
Autorstefankueng

Log Message

Improve the recognition algorithm for UTF8 sequences in text files.
Also add two testfiles for it.

Ändern Zusammenfassung

Diff

--- trunk/src/TortoiseMerge/FileTextLines.cpp (revision 5292)
+++ trunk/src/TortoiseMerge/FileTextLines.cpp (revision 5293)
@@ -41,7 +41,7 @@
4141 {
4242 if (0x0000 == *pVal++)
4343 return CFileTextLines::BINARY;
44- } // for (int i=0; i<cb; i=i+2)
44+ }
4545 pVal = (UINT16 *)pBuffer;
4646 if (*pVal == 0xFEFF)
4747 return CFileTextLines::UNICODE_LE;
@@ -51,7 +51,53 @@
5151 {
5252 if (*pVal2 == 0xBF)
5353 return CFileTextLines::UTF8;
54- } // if (*pVal == 0xBBEF)
54+ }
55+ // check for illegal UTF8 chars
56+ pVal2 = (UINT8 *)pBuffer;
57+ for (int i=0; i<cb; ++i)
58+ {
59+ if ((*pVal2 == 0xC0)||(*pVal2 == 0xC1)||(*pVal2 >= 0xF5))
60+ return CFileTextLines::ASCII;
61+ pVal2++;
62+ }
63+ pVal2 = (UINT8 *)pBuffer;
64+ bool bUTF8 = false;
65+ for (int i=0; i<cb; ++i)
66+ {
67+ if ((*pVal2 & 0xE0)==0xC0)
68+ {
69+ pVal2++;i++;
70+ if ((*pVal2 & 0xC0)!=0x80)
71+ return CFileTextLines::ASCII;
72+ bUTF8 = true;
73+ }
74+ if ((*pVal2 & 0xF0)==0xE0)
75+ {
76+ pVal2++;i++;
77+ if ((*pVal2 & 0xC0)!=0x80)
78+ return CFileTextLines::ASCII;
79+ pVal2++;i++;
80+ if ((*pVal2 & 0xC0)!=0x80)
81+ return CFileTextLines::ASCII;
82+ bUTF8 = true;
83+ }
84+ if ((*pVal2 & 0xF8)==0xF0)
85+ {
86+ pVal2++;i++;
87+ if ((*pVal2 & 0xC0)!=0x80)
88+ return CFileTextLines::ASCII;
89+ pVal2++;i++;
90+ if ((*pVal2 & 0xC0)!=0x80)
91+ return CFileTextLines::ASCII;
92+ pVal2++;i++;
93+ if ((*pVal2 & 0xC0)!=0x80)
94+ return CFileTextLines::ASCII;
95+ bUTF8 = true;
96+ }
97+ pVal2++;
98+ }
99+ if (bUTF8)
100+ return CFileTextLines::UTF8;
55101 return CFileTextLines::ASCII;
56102 }
57103
--- trunk/test/mergediff/utf8bom.txt (nonexistent)
+++ trunk/test/mergediff/utf8bom.txt (revision 5293)
@@ -0,0 +1,3 @@
1+aeiou äöüÄÖÜ
2+
3+ÄÖÜöäü aeiou
\ No newline at end of file
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
--- trunk/test/mergediff/utf8nobom.txt (nonexistent)
+++ trunk/test/mergediff/utf8nobom.txt (revision 5293)
@@ -0,0 +1,3 @@
1+aeiou äöüÄÖÜ
2+
3+ÄÖÜöäü aeiou
\ No newline at end of file
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Show on old repository browser