mxwcore-legion/dep/g3dlite/source/TextInput.cpp

1347 lines
37 KiB
C++

/**
\file G3D/source/TextInput.cpp
\author Morgan McGuire, http://graphics.cs.williams.edu
\cite Based on a lexer written by Aaron Orenstein.
\created 2001-11-27
\edited 2012-07-22
*/
#include "G3D/fileutils.h"
#include "G3D/TextInput.h"
#include "G3D/BinaryInput.h"
#include "G3D/FileSystem.h"
#include "G3D/stringutils.h"
#ifdef _MSC_VER
# pragma warning (push)
#endif
namespace G3D {
Token TextInput::readSignificant() {
Token t;
do {
t = read();
} while ((t.type() == Token::COMMENT) || (t.type() == Token::NEWLINE));
return t;
}
double Token::number() const {
if (_type == NUMBER) {
return TextInput::parseNumber(_string);
} else {
return 0.0;
}
}
bool TextInput::parseBoolean(const std::string& _string) {
return toLower(_string) == "true";
}
double TextInput::parseNumber(const std::string& _string) {
std::string s = toLower(_string);
if (s == "-1.#ind00" || s == "-1.#ind" || s == "nan" || s == "NaN") {
return nan();
}
if (s == "1.#inf00" || s == "1.#inf" || s == "inf" || s == "+inf" || s == "Infinity") {
return inf();
}
if (s == "-1.#inf00" || s == "-1.#inf" || s == "-inf" || s == "-Infinity") {
return -inf();
}
double n;
if ((_string.length() > 2) &&
(_string[0] == '0') &&
(_string[1] == 'x')) {
// Hex
uint32 i;
sscanf(_string.c_str(), "%x", &i);
n = i;
} else {
sscanf(_string.c_str(), "%lg", &n);
}
return n;
}
TextInput::Settings::Settings () :
cppBlockComments(true),
cppLineComments(true),
otherLineComments(true),
escapeSequencesInStrings(true),
otherCommentCharacter('\0'),
otherCommentCharacter2('\0'),
generateCommentTokens(false),
generateNewlineTokens(false),
signedNumbers(true),
singleQuotedStrings(true),
singleQuoteCharacter('\''),
sourceFileName(),
startingLineNumberOffset(0),
msvcFloatSpecials(true),
simpleFloatSpecials(true),
proofSymbols(false),
caseSensitive(true)
{
trueSymbols.insert("true");
falseSymbols.insert("false");
}
Token TextInput::peek() {
if (stack.size() == 0) {
Token t;
nextToken(t);
push(t);
}
return stack.front();
}
int TextInput::peekLineNumber() {
return peek().line();
}
int TextInput::peekCharacterNumber() {
return peek().character();
}
Token TextInput::read() {
Token t;
read(t);
return t;
}
void TextInput::read(Token& t) {
if (stack.size() > 0) {
t = stack.front();
stack.pop_front();
} else {
nextToken(t);
}
}
std::string TextInput::readUntilDelimiterAsString(const char delimiter1, const char delimiter2) {
/*
// Reset the read position back to the start of that token
currentCharOffset = t.bytePosition();
lineNumber = t.line();
charNumber = t.character();
stack.clear();
if (currentCharOffset == buffer.size()) {
// End of file
return "";
}
*/
std::string s;
if (stack.size() > 0) {
// Need to back up. This only works if the stack is actually
// in proper order reflecting the real file, and doesn't
// contain incorrectly pushed elements.
Token t = stack.back();
stack.clear();
currentCharOffset = (int)t.bytePosition();
lineNumber = t.line();
charNumber = t.character();
}
// Read until delimiter or eof
while (currentCharOffset < buffer.size()) {
const char c = buffer[currentCharOffset];
if ((c == delimiter1) || (c == delimiter2)) {
// Done
break;
} else {
s += c;
++currentCharOffset;
++charNumber;
}
}
return s;
}
std::string TextInput::readUntilNewlineAsString() {
return readUntilDelimiterAsString('\r', '\n');
}
static void toUpper(Set<std::string>& set) {
Array<std::string> symbols;
set.getMembers(symbols);
set.clear();
for (int i = 0; i < symbols.size(); ++i) {
set.insert(toUpper(symbols[i]));
}
}
void TextInput::init() {
currentCharOffset = 0;
charNumber = 1;
lineNumber = 1 + options.startingLineNumberOffset;
if (! options.caseSensitive) {
// Convert true and false symbols to all uppercase for fast comparisons
toUpper(options.trueSymbols);
toUpper(options.falseSymbols);
}
}
void TextInput::push(const Token& t) {
stack.push_front(t);
}
bool TextInput::hasMore() {
return (peek()._type != Token::END);
}
int TextInput::eatInputChar() {
// Don't go off the end
if (currentCharOffset >= buffer.length()) {
return EOF;
}
unsigned char c = buffer[currentCharOffset];
++currentCharOffset;
// update lineNumber and charNumber to reflect the location of the *next*
// character which will be read.
// increment line number for \r, \n and \r\n which matches Token::NEWLINE parsing
if (c == '\r') {
++lineNumber;
charNumber = 1;
// check for \r\n
if (currentCharOffset < buffer.length()) {
unsigned char c2 = buffer[currentCharOffset];
if (c2 == '\n') {
c = c2;
++currentCharOffset;
}
}
} else if (c == '\n') {
++lineNumber;
charNumber = 1;
} else {
++charNumber;
}
return c;
}
int TextInput::peekInputChar(int distance) {
// Don't go off the end
if ((currentCharOffset + distance) >= buffer.length()) {
return EOF;
}
unsigned char c = buffer[currentCharOffset + distance];
return c;
}
void TextInput::nextToken(Token& t) {
t._bytePosition = currentCharOffset;
t._line = lineNumber;
t._character = charNumber;
t._type = Token::END;
t._extendedType = Token::END_TYPE;
int c = peekInputChar();
if (c == EOF) {
return;
}
// loop through white space, newlines and comments
// found before other tokens
bool whitespaceDone = false;
while (! whitespaceDone) {
whitespaceDone = true;
// generate newlines tokens for '\n' and '\r' and '\r\n'
while (isWhiteSpace(c)) {
if (options.generateNewlineTokens && isNewline(c)) {
t._type = Token::NEWLINE;
t._extendedType = Token::NEWLINE_TYPE;
t._bytePosition = currentCharOffset;
t._line = lineNumber;
t._character = charNumber;
t._string = c;
int c2 = peekInputChar(1);
if (c == '\r' && c2 == '\n') {
t._string += c2;
}
eatInputChar();
return;
} else {
// Consume the single whitespace
c = eatAndPeekInputChar();
}
}
// update line and character number to include discarded whitespace
t._line = lineNumber;
t._character = charNumber;
t._bytePosition = currentCharOffset;
if (isDigit(c)) {
// This is an unsigned number. Jump ahead for fast number reading.
goto numLabel;
}
int c2 = peekInputChar(1);
if ((c == '-') && isDigit(c2) && options.signedNumbers) {
// This is a simple number. Jump ahead for fast number reading.
// We treat this case specially because large (i.e., slow) files
// are usually large because they are full of numbers.
t._string = "-";
c = c2;
// Consume the minus sign
eatInputChar();
goto numLabel;
}
// parse comments and generate tokens if enabled
std::string commentString;
// check for line comments first
bool isLineComment = false;
if (options.cppLineComments && (c == '/' && c2 == '/')) {
// set start of line comment and eat markers
isLineComment = true;
eatInputChar();
eatInputChar();
} else if ( options.otherCommentCharacter &&
(options.otherCommentCharacter != '\0' && c == options.otherCommentCharacter) ) {
// set start of line comment and eat markers
isLineComment = true;
eatInputChar();
} else if ( options.otherCommentCharacter &&
(options.otherCommentCharacter2 != '\0' && c == options.otherCommentCharacter2) ) {
// set start of line comment and eat markers
isLineComment = true;
eatInputChar();
}
if (isLineComment) {
// consume line comment to newline or EOF
c = peekInputChar();
while (! isNewline(c) && c != EOF) {
// build comment string for token
commentString += c;
c = eatAndPeekInputChar();
}
if (options.generateCommentTokens) {
t._type = Token::COMMENT;
t._extendedType = Token::LINE_COMMENT_TYPE;
t._string = commentString;
return;
} else {
// There is whitespace after the comment (in particular, the
// newline that terminates the comment). There might also be
// whitespace at the start of the next line.
whitespaceDone = false;
}
} else if (options.cppBlockComments && (c == '/' && c2 == '*')) {
// consume block comment to end-marker or EOF
// consume both start-comment chars, can't let the trailing one
// help close the comment.
eatInputChar();
eatInputChar();
// c is the next character we'll read, c2 is the one after *that*
c = peekInputChar();
c2 = peekInputChar(1);
while (! ((c == '*') && (c2 == '/')) && (c != EOF)) {
commentString += c;
// Eat input char may consume more than one character if there is a newline
eatInputChar();
c = peekInputChar();
c2 = peekInputChar(1);
}
eatInputChar(); // eat closing '*'
eatInputChar(); // eat closing '/'
c = peekInputChar();
if (options.generateCommentTokens) {
t._type = Token::COMMENT;
t._extendedType = Token::BLOCK_COMMENT_TYPE;
t._string = commentString;
return;
} else {
// There is whitespace after the comment (in particular, the
// newline that terminates the comment). There might also be
// whitespace at the start of the next line.
whitespaceDone = false;
}
}
} // while (! whitespaceDone)
t._line = lineNumber;
t._character = charNumber;
t._bytePosition = currentCharOffset;
// handle EOF
if (c == EOF) {
return;
}
// Extended ASCII parses as itself, except for EOF
if (c > 127 && c < 255) {
t._type = Token::SYMBOL;
t._extendedType = Token::SYMBOL_TYPE;
t._string = c;
c = eatAndPeekInputChar();
}
// Perform appropriate setup for a symbol (including setting up the token
// string to start with c), eat the input character, and overwrite
// 'c' with the peeked next input character.
#define SETUP_SYMBOL(c) \
{ \
t._type = Token::SYMBOL; \
t._extendedType = Token::SYMBOL_TYPE; \
t._string = c; \
c = eatAndPeekInputChar(); \
}
switch (c) {
case '@': // Simple symbols -> just themselves.
case '(':
case ')':
case ',':
case ';':
case '{':
case '}':
case '[':
case ']':
case '#':
case '$':
case '?':
case '%':
SETUP_SYMBOL(c);
return;
case '-': // negative number, -, --, -=, or ->
SETUP_SYMBOL(c);
switch (c) {
case '>': // ->
case '-': // --
case '=': // -=
t._string += c;
eatInputChar();
return;
}
if (options.signedNumbers) {
if (isDigit(c) || (c == '.' && isDigit(peekInputChar(1)))) {
// Negative number. 'c' is still the first digit, and is
// the next input char.
goto numLabel;
} else {
char terminal = peekInputChar(3);
if (options.simpleFloatSpecials && (c == 'i') && (peekInputChar(1) == 'n') && (peekInputChar(2) == 'f') &&
! isLetter(terminal) && (terminal != '_')) {
// negative infinity
t._type = Token::NUMBER;
t._extendedType = Token::FLOATING_POINT_TYPE;
t._string = "-inf";
eatInputChar(); // i
eatInputChar(); // n
eatInputChar(); // f
return;
}
}
}
// plain -
return;
case '+': // positive number, +, ++, or +=
SETUP_SYMBOL(c);
switch (c) {
case '+': // ++
case '=': // +=
t._string += c;
eatInputChar();
return;
}
if (options.signedNumbers) {
if (isDigit(c) || (c == '.' && isDigit(peekInputChar(1)))) {
// Positive number. 'c' is still the first digit, and is
// the next input char.
goto numLabel;
} else {
char terminal = peekInputChar(3);
if (options.simpleFloatSpecials && (c == 'i') && (peekInputChar(1) == 'n') && (peekInputChar(2) == 'f') &&
! isLetter(terminal) && (terminal != '_')) {
// positive infinity
t._type = Token::NUMBER;
t._extendedType = Token::FLOATING_POINT_TYPE;
t._string = "+inf";
eatInputChar(); // i
eatInputChar(); // n
eatInputChar(); // f
return;
}
}
}
return;
case ':': // : or :: or ::> or ::= or := or :>
SETUP_SYMBOL(c);
if (c == ':') {
t._string += c;
eatInputChar();
if (options.proofSymbols) {
c = peekInputChar(0);
if ((c == '>') || (c == '=')) {
t._string += c;
eatInputChar();
}
}
} else if (options.proofSymbols && (c == '=' || c == '>')) {
t._string += c;
eatInputChar();
}
return;
case '=': // = or == or =>
SETUP_SYMBOL(c);
if (c == '=') {
t._string += c;
eatInputChar();
} else if (options.proofSymbols && (c == '>')) {
t._string += c;
eatInputChar();
}
return;
case '*': // * or *=
case '/': // / or /=
case '!': // ! or !=
case '~': // ~ or ~=
case '^': // ^ or ^=
SETUP_SYMBOL(c);
if (c == '=') {
t._string += c;
eatInputChar();
}
return;
case '>': // >, >>,or >=
case '<': // <<, <<, or <= or <- or <:
case '|': // ||, ||, or |= or |-
case '&': // &, &&, or &=
{
int orig_c = c;
SETUP_SYMBOL(c);
if ((c == '=') || (orig_c == c)) {
t._string += c;
eatInputChar();
} else if (options.proofSymbols) {
if ((orig_c == '<') && (c == '-')) {
t._string += c;
eatInputChar();
} else if ((orig_c == '|') && (c == '-')) {
t._string += c;
eatInputChar();
} else if ((orig_c == '<') && (c == ':')) {
t._string += c;
c = eatAndPeekInputChar();
if (c == ':') {
t._string += c;
eatInputChar();
}
}
}
}
return;
case '\\': // backslash or escaped comment char.
SETUP_SYMBOL(c);
if ((options.otherCommentCharacter != '\0'
&& c == options.otherCommentCharacter)
|| (options.otherCommentCharacter2 != '\0'
&& c == options.otherCommentCharacter2)) {
// escaped comment character. Return the raw comment
// char (no backslash).
t._string = c;
eatInputChar();
return;
}
return;
case '.': // number, ., .., or ...
if (isDigit(peekInputChar(1))) {
// We're parsing a float that began without a leading zero
goto numLabel;
}
SETUP_SYMBOL(c);
if (c == '.') { // .. or ...
t._string += c;
c = eatAndPeekInputChar();
if (c == '.') { // ...
t._string += c;
eatInputChar();
}
return;
}
return;
} // switch (c)
#undef SETUP_SYMBOL
numLabel:
if (isDigit(c) || (c == '.')) {
// A number. Note-- single dots have been
// parsed already, so a . indicates a number
// less than 1 in floating point form.
// [0-9]*(\.[0-9][f]) or [0-9]+ or 0x[0-9,A-F]+
if (t._string != "-") {
// If we picked up a leading "-" sign above, keep it,
// otherwise drop the string parsed thus far
t._string = "";
}
t._type = Token::NUMBER;
if (c == '.') {
t._extendedType = Token::FLOATING_POINT_TYPE;
} else {
t._extendedType = Token::INTEGER_TYPE;
}
if ((c == '0') && (peekInputChar(1) == 'x')) {
// Hex number
t._string += "0x";
// skip the 0x
eatInputChar();
eatInputChar();
c = peekInputChar();
while (isDigit(c) || ((c >= 'A') && (c <= 'F')) || ((c >= 'a') && (c <= 'f'))) {
t._string += c;
c = eatAndPeekInputChar();
}
} else {
// Non-hex number
// Read the part before the decimal.
while (isDigit(c)) {
t._string += c;
c = eatAndPeekInputChar();
}
// True if we are reading a floating-point special type
bool isSpecial = false;
// Read the decimal, if one exists
if (c == '.') {
t._extendedType = Token::FLOATING_POINT_TYPE;
// The '.' character was a decimal point, not the start of a
// method or range operator
t._string += c;
c = eatAndPeekInputChar();
// Floating point specials (msvc format only)
if (options.msvcFloatSpecials && (c == '#')) {
isSpecial = true;
// We are reading a floating point special value
// of the form -1.#IND00, -1.#INF00, or 1.#INF00
// (with or without the trailing 00
c = eatAndPeekInputChar();
char test = c;
if (! options.caseSensitive) {
test = toupper(c);
}
if (test != 'I') {
throw BadMSVCSpecial
("Incorrect floating-point special (inf or nan) "
"format.",
t.line(), charNumber);
}
c = eatAndPeekInputChar();
test = c;
if (! options.caseSensitive) {
test = toupper(c);
}
if (test != 'N') {
throw BadMSVCSpecial
(
"Incorrect floating-point special (inf or nan) "
"format.",
t.line(), charNumber);
}
t._string += "#IN";
c = eatAndPeekInputChar();
test = c;
if (! options.caseSensitive) {
test = toupper(c);
}
if ((test != 'F') && (test != 'D')) {
throw BadMSVCSpecial
(
"Incorrect floating-point special (inf or nan) "
"format.",
t.line(), charNumber);
}
t._string += c;
// On older systems, there may be an extra 00 tacked on.
for (int j = 0; j < 2; ++j) {
c = eatAndPeekInputChar();
if (c == '0') {
c = eatAndPeekInputChar();
if (c != '0') {
throw BadMSVCSpecial
(
"Incorrect floating-point special (inf or nan) "
"format.",
t.line(), charNumber);
} else {
eatInputChar();
t._string += "00";
}
} else {
break;
}
}
} else {
// Read the part after the decimal
while (isDigit((char)c)) {
t._string += (char)c;
c = eatAndPeekInputChar();
}
}
}
if (! isSpecial && ((c == 'e') || (c == 'E'))) {
// Read exponent
t._extendedType = Token::FLOATING_POINT_TYPE;
t._string += c;
c = eatAndPeekInputChar();
if ((c == '-') || (c == '+')) {
t._string += c;
c = eatAndPeekInputChar();
}
while (isDigit(c)) {
t._string += c;
c = eatAndPeekInputChar();
}
}
if (! isSpecial && (t._extendedType == Token::FLOATING_POINT_TYPE) && (c == 'f')) {
// Trailing f on a float
t._string += c;
c = eatAndPeekInputChar();
}
}
return;
} else if (isLetter(c) || (c == '_')) {
// Identifier or keyword
// [A-Za-z_][A-Za-z_0-9]*
t._type = Token::SYMBOL;
t._extendedType = Token::SYMBOL_TYPE;
t._string = "";
do {
t._string += c;
c = eatAndPeekInputChar();
} while (isLetter(c) || isDigit(c) || (c == '_'));
// See if this symbol is actually a boolean
if ((options.trueSymbols.size() > 0) || (options.falseSymbols.size() > 0)) {
std::string str = t._string;
if (! options.caseSensitive) {
str = toUpper(str);
}
if (options.trueSymbols.contains(str)) {
t._type = Token::BOOLEAN;
t._extendedType = Token::BOOLEAN_TYPE;
t._bool = true;
} else if (options.falseSymbols.contains(str)) {
t._type = Token::BOOLEAN;
t._extendedType = Token::BOOLEAN_TYPE;
t._bool = false;
}
}
if (options.simpleFloatSpecials && ((t._string == "nan") || (t._string == "inf"))) {
t._type = Token::NUMBER;
t._extendedType = Token::FLOATING_POINT_TYPE;
}
return;
} else if (c == '\"') {
// Discard the double-quote.
eatInputChar();
// Double quoted string
parseQuotedString('\"', t);
return;
} else if (c == options.singleQuoteCharacter) {
// Discard the single-quote.
eatInputChar();
if (options.singleQuotedStrings) {
// Single quoted string
parseQuotedString(options.singleQuoteCharacter, t);
} else {
t._string = c;
t._type = Token::SYMBOL;
t._extendedType = Token::SYMBOL_TYPE;
}
return;
} // end of special case tokens
if ((c == EOF) || (c == '\0')) {
t._type = Token::END;
t._extendedType = Token::END_TYPE;
t._string = "";
return;
}
// Some unknown token
throw format("Unrecognized token type beginning with character '%c' (ASCII %d)", c, c);
return;
}
void TextInput::parseQuotedString(unsigned char delimiter, Token& t) {
t._type = Token::STRING;
if (delimiter == options.singleQuoteCharacter) {
t._extendedType = Token::SINGLE_QUOTED_TYPE;
} else {
t._extendedType = Token::DOUBLE_QUOTED_TYPE;
}
while (true) {
// We're definitely going to consume the next input char, so we get
// it right now. This makes the condition handling below a bit easier.
int c = eatInputChar();
if (c == EOF) {
// END inside a quoted string. (We finish the string.)
break;
}
if (options.escapeSequencesInStrings && (c == '\\')) {
// An escaped character. We're definitely going to consume it,
// so we get it (and consume it) now.
c = eatInputChar();
switch (c) {
case 'r':
t._string += '\r';
break;
case 'n':
t._string += '\n';
break;
case 't':
t._string += '\t';
break;
case '0':
t._string += '\0';
break;
case '\\':
case '\"':
t._string += (char)c;
break;
default:
if (c == options.singleQuoteCharacter) {
t._string += (char)c;
break;
}
if (((c == options.otherCommentCharacter) &&
(options.otherCommentCharacter != '\0')) ||
((c == options.otherCommentCharacter2) &&
(options.otherCommentCharacter2 != '\0'))) {
t._string += c;
}
// otherwise, some illegal escape sequence; skip it.
break;
} // switch
} else if (c == delimiter) {
// End of the string. Already consumed the character.
break;
} else {
// All other chars, go on to the string. Already consumed the
// character.
t._string += (char)c;
}
}
}
bool TextInput::readBoolean() {
const Token& t = read();
if (t._type == Token::BOOLEAN) {
return t.boolean();
}
// Push initial token back, and throw an error. We intentionally
// indicate that the wrong type is the type of the initial token.
// Logically, the number started there.
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::BOOLEAN, t._type);
}
int TextInput::readInteger() {
Token t;
read(t);
if (t._extendedType == Token::INTEGER_TYPE) { // common case
return int(t.number());
} else {
// Even if signedNumbers is disabled, readInteger attempts to
// read a signed number, so we handle that case here.
if (! options.signedNumbers
&& (t._type == Token::SYMBOL)
&& ((t._string == "-")
|| (t._string == "+"))) {
Token t2;
read(t2);
if ((t2._extendedType == Token::INTEGER_TYPE)
&& (t2._character == t._character + 1)) {
if (t._string == "-") {
return (int)-t2.number();
} else {
return (int)t2.number();
}
}
// push back the second token.
push(t2);
}
// Push initial token back, and throw an error. We intentionally
// indicate that the wrong type is the type of the initial token.
// Logically, the number started there.
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::NUMBER, t._type);
}
}
double TextInput::readNumber() {
Token t;
read(t);
if (t._type == Token::NUMBER) { // common case
return t.number();
}
// Even if signedNumbers is disabled, readNumber attempts to
// read a signed number, so we handle that case here.
if (! options.signedNumbers
&& (t._type == Token::SYMBOL)
&& ((t._string == "-")
|| (t._string == "+"))) {
Token t2(read());
if ((t2._type == Token::NUMBER)
&& (t2._character == t._character + 1)) {
if (t._string == "-") {
return -t2.number();
} else {
return t2.number();
}
}
// push back the second token.
push(t2);
}
// Push initial token back, and throw an error. We intentionally
// indicate that the wrong type is the type of the initial token.
// Logically, the number started there.
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::NUMBER, t._type);
}
Token TextInput::readStringToken() {
Token t;
read(t);
if (t._type == Token::STRING) { // fast path
return t;
}
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::STRING, t._type);
}
std::string TextInput::readString() {
return readStringToken()._string;
}
void TextInput::readString(const std::string& s) {
const Token& t = readStringToken();
if (t._string == s) { // fast path
return;
}
push(t);
throw WrongString(options.sourceFileName, t.line(), t.character(),
s, t._string);
}
Token TextInput::readCommentToken() {
Token t;
read(t);
if (t._type == Token::COMMENT) { // fast path
return t;
}
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::COMMENT, t._type);
}
std::string TextInput::readComment() {
return readCommentToken()._string;
}
void TextInput::readComment(const std::string& s) {
const Token& t = readCommentToken();
if (t._string == s) { // fast path
return;
}
push(t);
throw WrongString(options.sourceFileName, t.line(), t.character(),
s, t._string);
}
Token TextInput::readNewlineToken() {
Token t;
read(t);
if (t._type == Token::NEWLINE) { // fast path
return t;
}
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::NEWLINE, t._type);
}
std::string TextInput::readNewline() {
return readNewlineToken()._string;
}
void TextInput::readNewline(const std::string& s) {
const Token& t = readNewlineToken();
if (t._string == s) { // fast path
return;
}
push(t);
throw WrongString(options.sourceFileName, t.line(), t.character(),
s, t._string);
}
Token TextInput::readSymbolToken() {
Token t;
readSymbolToken(t);
return t;
}
void TextInput::readSymbolToken(Token& t) {
read(t);
if (t._type == Token::SYMBOL) { // fast path
return;
}
push(t);
throw WrongTokenType(options.sourceFileName, t.line(), t.character(),
Token::SYMBOL, t._type);
}
std::string TextInput::readSymbol() {
return readSymbolToken()._string;
}
void TextInput::readSymbol(const std::string& symbol) {
Token t;
readSymbolToken(t);
if (t._string == symbol) { // fast path
return;
}
push(t);
throw WrongSymbol(options.sourceFileName, t.line(), t.character(),
symbol, t._string);
}
TextInput::TextInput(const std::string& filename, const Settings& opt) : options(opt) {
init();
if (options.sourceFileName.empty()) {
options.sourceFileName = filename;
}
std::string zipfile;
if (FileSystem::inZipfile(filename, zipfile)) {
// TODO: this could be faster if we directly read the zipfile
const std::string& input = readWholeFile(filename);
size_t n = input.size();
buffer.resize(n);
System::memcpy(buffer.getCArray(), input.c_str(), n);
} else {
// Read directly into the array
const uint64 n = FileSystem::size(filename);
alwaysAssertM(n != uint64(-1), std::string("File does not exist: ") + filename);
buffer.resize(size_t(n));
FILE* f = FileSystem::fopen(filename.c_str(), "rb");
fread(buffer.getCArray(), 1, size_t(n), f);
FileSystem::fclose(f);
}
}
void TextInput::initFromString(const char* str, int len, const Settings& settings) {
options = settings;
init();
if (options.sourceFileName.empty()) {
if (len < 14) {
options.sourceFileName = format("\"%.*s\"", len, str);
} else {
options.sourceFileName = format("\"%.*s...\"", 10, str);
}
}
buffer.resize(len);
System::memcpy(buffer.getCArray(), str, buffer.size());
}
TextInput::TextInput(FS fs, const std::string& str, const Settings& opt) {
(void)fs;
initFromString(str.c_str(), (int)str.size(), opt);
}
TextInput::TextInput(FS fs, const char* str, size_t len, const Settings& opt) : options(opt) {
(void)fs;
initFromString(str, (int)len, opt);
}
const std::string& TextInput::filename() const {
return options.sourceFileName;
}
///////////////////////////////////////////////////////////////////////////////////
TextInput::TokenException::TokenException(
const std::string& src,
int ln,
int ch) : ParseError(src, ln, ch, format("%s(%d) : ", src.c_str(), ln)),
sourceFile(src) {
}
///////////////////////////////////////////////////////////////////////////////////
static const char* tokenTypeToString(Token::Type t) {
switch (t) {
case Token::SYMBOL:
return "Token::SYMBOL";
case Token::STRING:
return "Token::STRING";
case Token::NUMBER:
return "Token::NUMBER";
case Token::END:
return "Token::END";
case Token::NEWLINE:
return "Token::NEWLINE";
default:
debugAssertM(false, "Fell through switch");
return "?";
}
}
TextInput::WrongTokenType::WrongTokenType(
const std::string& src,
int ln,
int ch,
Token::Type e,
Token::Type a) :
TokenException(src, ln, ch), expected(e), actual(a) {
message += format("Expected token of type %s, found type %s.",
tokenTypeToString(e), tokenTypeToString(a));
}
TextInput::BadMSVCSpecial::BadMSVCSpecial(
const std::string& src,
int ln,
int ch) :
TokenException(src, ln, ch) {
}
TextInput::WrongSymbol::WrongSymbol(
const std::string& src,
int ln,
int ch,
const std::string& e,
const std::string& a) :
TokenException(src, ln, ch), expected(e), actual(a) {
message += format("Expected symbol '%s', found symbol '%s'.",
e.c_str(), a.c_str());
}
TextInput::WrongString::WrongString(
const std::string& src,
int ln,
int ch,
const std::string& e,
const std::string& a) :
TokenException(src, ln, ch), expected(e), actual(a) {
message += format("Expected string '%s', found string '%s'.",
e.c_str(), a.c_str());
}
void deserialize(bool& b, TextInput& ti) {
b = ti.readSymbol() == "true";
}
void deserialize(int& b, TextInput& ti) {
b = iRound(ti.readNumber());
}
void deserialize(uint8& b, TextInput& ti) {
b = (uint8)iRound(ti.readNumber());
}
void deserialize(double& b, TextInput& ti) {
b = ti.readNumber();
}
void deserialize(float& b, TextInput& ti) {
b = (float)ti.readNumber();
}
} // namespace
#ifdef _MSC_VER
# pragma warning (pop)
#endif