• R/O
  • SSH
  • HTTPS

tortoisesvn:


File Info

Rev. 12085
Größe 19,489 Bytes
Zeit 2008-02-10 01:04:50
Autor stefankueng
Log Message

Update to version 1.75 of Scintilla.

Content

// Scintilla source code edit control
/** @file LexBash.cxx
 ** Lexer for Bash.
 **/
// Copyright 2004-2007 by Neil Hodgson <neilh@scintilla.org>
// Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
// The License.txt file describes the conditions under which this software may be distributed.

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>

#include "Platform.h"

#include "PropSet.h"
#include "Accessor.h"
#include "KeyWords.h"
#include "Scintilla.h"
#include "SciLexer.h"

// define this if you want 'invalid octals' to be marked as errors
// usually, this is not a good idea, permissive lexing is better
#undef PEDANTIC_OCTAL

#define BASH_BASE_ERROR		65
#define BASH_BASE_DECIMAL	66
#define BASH_BASE_HEX		67
#ifdef PEDANTIC_OCTAL
#define BASH_BASE_OCTAL		68
#define BASH_BASE_OCTAL_ERROR	69
#endif

#define HERE_DELIM_MAX 256

#ifdef SCI_NAMESPACE
using namespace Scintilla;
#endif

static inline int translateBashDigit(char ch) {
	if (ch >= '0' && ch <= '9') {
		return ch - '0';
	} else if (ch >= 'a' && ch <= 'z') {
		return ch - 'a' + 10;
	} else if (ch >= 'A' && ch <= 'Z') {
		return ch - 'A' + 36;
	} else if (ch == '@') {
		return 62;
	} else if (ch == '_') {
		return 63;
	}
	return BASH_BASE_ERROR;
}

static inline bool isEOLChar(char ch) {
	return (ch == '\r') || (ch == '\n');
}

static bool isSingleCharOp(char ch) {
	char strCharSet[2];
	strCharSet[0] = ch;
	strCharSet[1] = '\0';
	return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet));
}

static inline bool isBashOperator(char ch) {
	if (ch == '^' || ch == '&' || ch == '\\' || ch == '%' ||
	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
	        ch == '>' || ch == ',' || ch == '/' || ch == '<' ||
	        ch == '?' || ch == '!' || ch == '.' || ch == '~' ||
		ch == '@')
		return true;
	return false;
}

static int classifyWordBash(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
	char s[100];
	for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
		s[i] = styler[start + i];
		s[i + 1] = '\0';
	}
	char chAttr = SCE_SH_IDENTIFIER;
	if (keywords.InList(s))
		chAttr = SCE_SH_WORD;
	styler.ColourTo(end, chAttr);
	return chAttr;
}

static inline int getBashNumberBase(unsigned int start, unsigned int end, Accessor &styler) {
	int base = 0;
	for (unsigned int i = 0; i < end - start + 1 && i < 10; i++) {
		base = base * 10 + (styler[start + i] - '0');
	}
	if (base > 64 || (end - start) > 1) {
		return BASH_BASE_ERROR;
	}
	return base;
}

static inline bool isEndVar(char ch) {
	return !isalnum(ch) && ch != '$' && ch != '_';
}

static inline bool isNonQuote(char ch) {
	return isalnum(ch) || ch == '_';
}

static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
		return false;
	}
	while (*val) {
		if (*val != styler[pos++]) {
			return false;
		}
		val++;
	}
	return true;
}

static char opposite(char ch) {
	if (ch == '(')
		return ')';
	if (ch == '[')
		return ']';
	if (ch == '{')
		return '}';
	if (ch == '<')
		return '>';
	return ch;
}

static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
                             WordList *keywordlists[], Accessor &styler) {

	// Lexer for bash often has to backtrack to start of current style to determine
	// which characters are being used as quotes, how deeply nested is the
	// start position and what the termination string is for here documents

	WordList &keywords = *keywordlists[0];

	class HereDocCls {
	public:
		int State;		// 0: '<<' encountered
		// 1: collect the delimiter
		// 2: here doc text (lines after the delimiter)
		char Quote;		// the char after '<<'
		bool Quoted;		// true if Quote in ('\'','"','`')
		bool Indent;		// indented delimiter (for <<-)
		int DelimiterLength;	// strlen(Delimiter)
		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf
		HereDocCls() {
			State = 0;
            Quote = 0;
            Quoted = false;
            Indent = 0;
			DelimiterLength = 0;
			Delimiter = new char[HERE_DELIM_MAX];
			Delimiter[0] = '\0';
		}
		~HereDocCls() {
			delete []Delimiter;
		}
	};
	HereDocCls HereDoc;

	class QuoteCls {
		public:
		int  Rep;
		int  Count;
		char Up;
		char Down;
		QuoteCls() {
			this->New(1);
		}
		void New(int r) {
			Rep   = r;
			Count = 0;
			Up    = '\0';
			Down  = '\0';
		}
		void Open(char u) {
			Count++;
			Up    = u;
			Down  = opposite(Up);
		}
	};
	QuoteCls Quote;

	int state = initStyle;
	int numBase = 0;
	unsigned int lengthDoc = startPos + length;

	// If in a long distance lexical state, seek to the beginning to find quote characters
	// Bash strings can be multi-line with embedded newlines, so backtrack.
	// Bash numbers have additional state during lexing, so backtrack too.
	if (state == SCE_SH_HERE_Q) {
		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
			startPos--;
		}
		startPos = styler.LineStart(styler.GetLine(startPos));
		state = styler.StyleAt(startPos - 1);
	}
	if (state == SCE_SH_STRING
	 || state == SCE_SH_BACKTICKS
	 || state == SCE_SH_CHARACTER
	 || state == SCE_SH_NUMBER
	 || state == SCE_SH_IDENTIFIER
	 || state == SCE_SH_COMMENTLINE
	) {
		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
			startPos--;
		}
		state = SCE_SH_DEFAULT;
	}

	styler.StartAt(startPos);
	char chPrev = styler.SafeGetCharAt(startPos - 1);
	if (startPos == 0)
		chPrev = '\n';
	char chNext = styler[startPos];
	styler.StartSegment(startPos);

	for (unsigned int i = startPos; i < lengthDoc; i++) {
		char ch = chNext;
		// if the current character is not consumed due to the completion of an
		// earlier style, lexing can be restarted via a simple goto
	restartLexer:
		chNext = styler.SafeGetCharAt(i + 1);
		char chNext2 = styler.SafeGetCharAt(i + 2);

		if (styler.IsLeadByte(ch)) {
			chNext = styler.SafeGetCharAt(i + 2);
			chPrev = ' ';
			i += 1;
			continue;
		}

		if ((chPrev == '\r' && ch == '\n')) {	// skip on DOS/Windows
			styler.ColourTo(i, state);
			chPrev = ch;
			continue;
		}

		if (HereDoc.State == 1 && isEOLChar(ch)) {
			// Begin of here-doc (the line after the here-doc delimiter):
			// Lexically, the here-doc starts from the next line after the >>, but the
			// first line of here-doc seem to follow the style of the last EOL sequence
			HereDoc.State = 2;
			if (HereDoc.Quoted) {
				if (state == SCE_SH_HERE_DELIM) {
					// Missing quote at end of string! We are stricter than bash.
					// Colour here-doc anyway while marking this bit as an error.
					state = SCE_SH_ERROR;
				}
				styler.ColourTo(i - 1, state);
				// HereDoc.Quote always == '\''
				state = SCE_SH_HERE_Q;
			} else {
				styler.ColourTo(i - 1, state);
				// always switch
				state = SCE_SH_HERE_Q;
			}
		}

		if (state == SCE_SH_DEFAULT) {
			if (ch == '\\') {	// escaped character
				if (i < lengthDoc - 1)
					i++;
				ch = chNext;
				chNext = chNext2;
				styler.ColourTo(i, SCE_SH_IDENTIFIER);
			} else if (isdigit(ch)) {
				state = SCE_SH_NUMBER;
				numBase = BASH_BASE_DECIMAL;
				if (ch == '0') {	// hex,octal
					if (chNext == 'x' || chNext == 'X') {
						numBase = BASH_BASE_HEX;
						i++;
						ch = chNext;
						chNext = chNext2;
					} else if (isdigit(chNext)) {
#ifdef PEDANTIC_OCTAL
						numBase = BASH_BASE_OCTAL;
#else
						numBase = BASH_BASE_HEX;
#endif
					}
				}
			} else if (iswordstart(ch)) {
				state = SCE_SH_WORD;
				if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
					// We need that if length of word == 1!
					// This test is copied from the SCE_SH_WORD handler.
					classifyWordBash(styler.GetStartSegment(), i, keywords, styler);
					state = SCE_SH_DEFAULT;
				}
			} else if (ch == '#') {
				state = SCE_SH_COMMENTLINE;
			} else if (ch == '\"') {
				state = SCE_SH_STRING;
				Quote.New(1);
				Quote.Open(ch);
			} else if (ch == '\'') {
				state = SCE_SH_CHARACTER;
				Quote.New(1);
				Quote.Open(ch);
			} else if (ch == '`') {
				state = SCE_SH_BACKTICKS;
				Quote.New(1);
				Quote.Open(ch);
			} else if (ch == '$') {
				if (chNext == '{') {
					state = SCE_SH_PARAM;
					goto startQuote;
				} else if (chNext == '\'') {
					state = SCE_SH_CHARACTER;
					goto startQuote;
				} else if (chNext == '"') {
					state = SCE_SH_STRING;
					goto startQuote;
				} else if (chNext == '(' && chNext2 == '(') {
					styler.ColourTo(i, SCE_SH_OPERATOR);
					state = SCE_SH_DEFAULT;
					goto skipChar;
				} else if (chNext == '(' || chNext == '`') {
					state = SCE_SH_BACKTICKS;
				startQuote:
					Quote.New(1);
					Quote.Open(chNext);
					goto skipChar;
				} else {
					state = SCE_SH_SCALAR;
				skipChar:
					i++;
					ch = chNext;
					chNext = chNext2;
				}
			} else if (ch == '*') {
				if (chNext == '*') {	// exponentiation
					i++;
					ch = chNext;
					chNext = chNext2;
				}
				styler.ColourTo(i, SCE_SH_OPERATOR);
			} else if (ch == '<' && chNext == '<') {
				state = SCE_SH_HERE_DELIM;
				HereDoc.State = 0;
				HereDoc.Indent = false;
			} else if (ch == '-'	// file test operators
			           && isSingleCharOp(chNext)
			           && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))
			           && isspace(chPrev)) {
				styler.ColourTo(i + 1, SCE_SH_WORD);
				state = SCE_SH_DEFAULT;
				i++;
				ch = chNext;
				chNext = chNext2;
			} else if (isBashOperator(ch)) {
				styler.ColourTo(i, SCE_SH_OPERATOR);
			} else {
				// keep colouring defaults to make restart easier
				styler.ColourTo(i, SCE_SH_DEFAULT);
			}
		} else if (state == SCE_SH_NUMBER) {
			int digit = translateBashDigit(ch);
			if (numBase == BASH_BASE_DECIMAL) {
				if (ch == '#') {
					numBase = getBashNumberBase(styler.GetStartSegment(), i - 1, styler);
					if (numBase == BASH_BASE_ERROR)	// take the rest as comment
						goto numAtEnd;
				} else if (!isdigit(ch))
					goto numAtEnd;
			} else if (numBase == BASH_BASE_HEX) {
				if ((digit < 16) || (digit >= 36 && digit <= 41)) {
					// hex digit 0-9a-fA-F
				} else
					goto numAtEnd;
#ifdef PEDANTIC_OCTAL
			} else if (numBase == BASH_BASE_OCTAL ||
				   numBase == BASH_BASE_OCTAL_ERROR) {
				if (digit > 7) {
					if (digit <= 9) {
                                                numBase = BASH_BASE_OCTAL_ERROR;
					} else
						goto numAtEnd;
				}
#endif
			} else if (numBase == BASH_BASE_ERROR) {
				if (digit > 9)
					goto numAtEnd;
			} else {	// DD#DDDD number style handling
				if (digit != BASH_BASE_ERROR) {
					if (numBase <= 36) {
						// case-insensitive if base<=36
						if (digit >= 36) digit -= 26;
					}
					if (digit >= numBase) {
						if (digit <= 9) {
							numBase = BASH_BASE_ERROR;
						} else
							goto numAtEnd;
					}
				} else {
			numAtEnd:
					if (numBase == BASH_BASE_ERROR
#ifdef PEDANTIC_OCTAL
					    || numBase == BASH_BASE_OCTAL_ERROR
#endif
                                           )
						state = SCE_SH_ERROR;
					styler.ColourTo(i - 1, state);
					state = SCE_SH_DEFAULT;
					goto restartLexer;
				}
			}
		} else if (state == SCE_SH_WORD) {
			if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
				// "." never used in Bash variable names
				// but used in file names
				classifyWordBash(styler.GetStartSegment(), i, keywords, styler);
				state = SCE_SH_DEFAULT;
				ch = ' ';
			}
		} else if (state == SCE_SH_IDENTIFIER) {
			if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
				styler.ColourTo(i, SCE_SH_IDENTIFIER);
				state = SCE_SH_DEFAULT;
				ch = ' ';
			}
		} else {
			if (state == SCE_SH_COMMENTLINE) {
				if (ch == '\\' && isEOLChar(chNext)) {
					// comment continuation
					if (chNext == '\r' && chNext2 == '\n') {
						i += 2;
						ch = styler.SafeGetCharAt(i);
						chNext = styler.SafeGetCharAt(i + 1);
					} else {
						i++;
						ch = chNext;
						chNext = chNext2;
					}
				} else if (isEOLChar(ch)) {
					styler.ColourTo(i - 1, state);
					state = SCE_SH_DEFAULT;
					goto restartLexer;
				} else if (isEOLChar(chNext)) {
					styler.ColourTo(i, state);
					state = SCE_SH_DEFAULT;
				}
			} else if (state == SCE_SH_HERE_DELIM) {
				//
				// From Bash info:
				// ---------------
				// Specifier format is: <<[-]WORD
				// Optional '-' is for removal of leading tabs from here-doc.
				// Whitespace acceptable after <<[-] operator
				//
				if (HereDoc.State == 0) { // '<<' encountered
					HereDoc.State = 1;
					HereDoc.Quote = chNext;
					HereDoc.Quoted = false;
					HereDoc.DelimiterLength = 0;
					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
					if (chNext == '\'' || chNext == '\"') {	// a quoted here-doc delimiter (' or ")
						i++;
						ch = chNext;
						chNext = chNext2;
						HereDoc.Quoted = true;
					} else if (!HereDoc.Indent && chNext == '-') {	// <<- indent case
						HereDoc.Indent = true;
						HereDoc.State = 0;
					} else if (isalpha(chNext) || chNext == '_' || chNext == '\\'
						|| chNext == '-' || chNext == '+' || chNext == '!') {
						// an unquoted here-doc delimiter, no special handling
                        // TODO check what exactly bash considers part of the delim
					} else if (chNext == '<') {	// HERE string <<<
						i++;
						ch = chNext;
						chNext = chNext2;
						styler.ColourTo(i, SCE_SH_HERE_DELIM);
						state = SCE_SH_DEFAULT;
						HereDoc.State = 0;
					} else if (isspacechar(chNext)) {
						// eat whitespace
						HereDoc.State = 0;
					} else if (isdigit(chNext) || chNext == '=' || chNext == '$') {
						// left shift << or <<= operator cases
						styler.ColourTo(i, SCE_SH_OPERATOR);
						state = SCE_SH_DEFAULT;
						HereDoc.State = 0;
					} else {
						// symbols terminates; deprecated zero-length delimiter
					}
				} else if (HereDoc.State == 1) { // collect the delimiter
					if (HereDoc.Quoted) { // a quoted here-doc delimiter
						if (ch == HereDoc.Quote) { // closing quote => end of delimiter
							styler.ColourTo(i, state);
							state = SCE_SH_DEFAULT;
						} else {
							if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
								i++;
								ch = chNext;
								chNext = chNext2;
							}
							HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
							HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
						}
					} else { // an unquoted here-doc delimiter
						if (isalnum(ch) || ch == '_' || ch == '-' || ch == '+' || ch == '!') {
							HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
							HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
						} else if (ch == '\\') {
							// skip escape prefix
						} else {
							styler.ColourTo(i - 1, state);
							state = SCE_SH_DEFAULT;
							goto restartLexer;
						}
					}
					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
						styler.ColourTo(i - 1, state);
						state = SCE_SH_ERROR;
						goto restartLexer;
					}
				}
			} else if (HereDoc.State == 2) {
				// state == SCE_SH_HERE_Q
				if (isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
					if (!HereDoc.Indent && isEOLChar(chPrev)) {
					endHereDoc:
						// standard HERE delimiter
						i += HereDoc.DelimiterLength;
						chPrev = styler.SafeGetCharAt(i - 1);
						ch = styler.SafeGetCharAt(i);
						if (isEOLChar(ch)) {
							styler.ColourTo(i - 1, state);
							state = SCE_SH_DEFAULT;
							HereDoc.State = 0;
							goto restartLexer;
						}
						chNext = styler.SafeGetCharAt(i + 1);
					} else if (HereDoc.Indent) {
						// indented HERE delimiter
						unsigned int bk = (i > 0)? i - 1: 0;
						while (i > 0) {
							ch = styler.SafeGetCharAt(bk--);
							if (isEOLChar(ch)) {
								goto endHereDoc;
							} else if (!isspacechar(ch)) {
								break;	// got leading non-whitespace
							}
						}
					}
				}
			} else if (state == SCE_SH_SCALAR) {	// variable names
				if (isEndVar(ch)) {
					if ((state == SCE_SH_SCALAR)
					    && i == (styler.GetStartSegment() + 1)) {
						// Special variable: $(, $_ etc.
						styler.ColourTo(i, state);
						state = SCE_SH_DEFAULT;
					} else {
						styler.ColourTo(i - 1, state);
						state = SCE_SH_DEFAULT;
						goto restartLexer;
					}
				}
			} else if (state == SCE_SH_STRING
				|| state == SCE_SH_CHARACTER
				|| state == SCE_SH_BACKTICKS
				|| state == SCE_SH_PARAM
				) {
				if (!Quote.Down && !isspacechar(ch)) {
					Quote.Open(ch);
				} else if (ch == '\\' && Quote.Up != '\\') {
					i++;
					ch = chNext;
					chNext = styler.SafeGetCharAt(i + 1);
				} else if (ch == Quote.Down) {
					Quote.Count--;
					if (Quote.Count == 0) {
						Quote.Rep--;
						if (Quote.Rep <= 0) {
							styler.ColourTo(i, state);
							state = SCE_SH_DEFAULT;
							ch = ' ';
						}
						if (Quote.Up == Quote.Down) {
							Quote.Count++;
						}
					}
				} else if (ch == Quote.Up) {
					Quote.Count++;
				}
			}
		}
		if (state == SCE_SH_ERROR) {
			break;
		}
		chPrev = ch;
	}
	styler.ColourTo(lengthDoc - 1, state);
}

static bool IsCommentLine(int line, Accessor &styler) {
	int pos = styler.LineStart(line);
	int eol_pos = styler.LineStart(line + 1) - 1;
	for (int i = pos; i < eol_pos; i++) {
		char ch = styler[i];
		if (ch == '#')
			return true;
		else if (ch != ' ' && ch != '\t')
			return false;
	}
	return false;
}

static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
                            Accessor &styler) {
	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
	unsigned int endPos = startPos + length;
	int visibleChars = 0;
	int lineCurrent = styler.GetLine(startPos);
	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
	int levelCurrent = levelPrev;
	char chNext = styler[startPos];
	int styleNext = styler.StyleAt(startPos);
	for (unsigned int i = startPos; i < endPos; i++) {
		char ch = chNext;
		chNext = styler.SafeGetCharAt(i + 1);
		int style = styleNext;
		styleNext = styler.StyleAt(i + 1);
		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
        // Comment folding
		if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
        {
            if (!IsCommentLine(lineCurrent - 1, styler)
                && IsCommentLine(lineCurrent + 1, styler))
                levelCurrent++;
            else if (IsCommentLine(lineCurrent - 1, styler)
                     && !IsCommentLine(lineCurrent+1, styler))
                levelCurrent--;
        }
		if (style == SCE_SH_OPERATOR) {
			if (ch == '{') {
				levelCurrent++;
			} else if (ch == '}') {
				levelCurrent--;
			}
		}
		if (atEOL) {
			int lev = levelPrev;
			if (visibleChars == 0 && foldCompact)
				lev |= SC_FOLDLEVELWHITEFLAG;
			if ((levelCurrent > levelPrev) && (visibleChars > 0))
				lev |= SC_FOLDLEVELHEADERFLAG;
			if (lev != styler.LevelAt(lineCurrent)) {
				styler.SetLevel(lineCurrent, lev);
			}
			lineCurrent++;
			levelPrev = levelCurrent;
			visibleChars = 0;
		}
		if (!isspacechar(ch))
			visibleChars++;
	}
	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
}

static const char * const bashWordListDesc[] = {
	"Keywords",
	0
};

LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);
Show on old repository browser