3 # Copyright 2014, 2015 Piotr Dabkowski
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the 'Software'),
7 # to deal in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 # the Software, and to permit persons to whom the Software is furnished to do so, subject
10 # to the following conditions:
12 # The above copyright notice and this permission notice shall be included in all copies or
13 # substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
16 # LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
19 # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
20 from __future__ import unicode_literals
24 from collections import defaultdict
26 PY3 = sys.version_info >= (3,0)
42 'RegularExpression': 9,
47 TokenName = dict((v,k) for k,v in token.items())
49 FnExprTokens = ['(', '{', '[', 'in', 'typeof', 'instanceof', 'new',
50 'return', 'case', 'delete', 'throw', 'void',
51 # assignment operators
52 '=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '>>>=',
53 '&=', '|=', '^=', ',',
54 # binary/unary operators
55 '+', '-', '*', '/', '%', '++', '--', '<<', '>>', '>>>', '&',
56 '|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=',
57 '<=', '<', '>', '!=', '!==']
59 syntax= set(('AssignmentExpression',
63 'ArrowFunctionExpression',
72 'ConditionalExpression',
77 'ExportAllDeclaration',
78 'ExportDefaultDeclaration',
79 'ExportNamedDeclaration',
81 'ExpressionStatement',
84 'FunctionDeclaration',
89 'ImportDefaultSpecifier',
90 'ImportNamespaceSpecifier',
104 'SequenceExpression',
109 'TaggedTemplateExpression',
117 'VariableDeclaration',
118 'VariableDeclarator',
123 # Error messages should be identical to V8.
125 'UnexpectedToken': 'Unexpected token %s',
126 'UnexpectedNumber': 'Unexpected number',
127 'UnexpectedString': 'Unexpected string',
128 'UnexpectedIdentifier': 'Unexpected identifier',
129 'UnexpectedReserved': 'Unexpected reserved word',
130 'UnexpectedTemplate': 'Unexpected quasi %s',
131 'UnexpectedEOS': 'Unexpected end of input',
132 'NewlineAfterThrow': 'Illegal newline after throw',
133 'InvalidRegExp': 'Invalid regular expression',
134 'UnterminatedRegExp': 'Invalid regular expression: missing /',
135 'InvalidLHSInAssignment': 'Invalid left-hand side in assignment',
136 'InvalidLHSInForIn': 'Invalid left-hand side in for-in',
137 'MultipleDefaultsInSwitch': 'More than one default clause in switch statement',
138 'NoCatchOrFinally': 'Missing catch or finally after try',
139 'UnknownLabel': 'Undefined label \'%s\'',
140 'Redeclaration': '%s \'%s\' has already been declared',
141 'IllegalContinue': 'Illegal continue statement',
142 'IllegalBreak': 'Illegal break statement',
143 'IllegalReturn': 'Illegal return statement',
144 'StrictModeWith': 'Strict mode code may not include a with statement',
145 'StrictCatchVariable': 'Catch variable may not be eval or arguments in strict mode',
146 'StrictVarName': 'Variable name may not be eval or arguments in strict mode',
147 'StrictParamName': 'Parameter name eval or arguments is not allowed in strict mode',
148 'StrictParamDupe': 'Strict mode function may not have duplicate parameter names',
149 'StrictFunctionName': 'Function name may not be eval or arguments in strict mode',
150 'StrictOctalLiteral': 'Octal literals are not allowed in strict mode.',
151 'StrictDelete': 'Delete of an unqualified identifier in strict mode.',
152 'StrictLHSAssignment': 'Assignment to eval or arguments is not allowed in strict mode',
153 'StrictLHSPostfix': 'Postfix increment/decrement may not have eval or arguments operand in strict mode',
154 'StrictLHSPrefix': 'Prefix increment/decrement may not have eval or arguments operand in strict mode',
155 'StrictReservedWord': 'Use of future reserved word in strict mode',
156 'TemplateOctalLiteral': 'Octal literals are not allowed in template strings.',
157 'ParameterAfterRestParameter': 'Rest parameter must be last formal parameter',
158 'DefaultRestParameter': 'Unexpected token =',
159 'ObjectPatternAsRestParameter': 'Unexpected token {',
160 'DuplicateProtoProperty': 'Duplicate __proto__ fields are not allowed in object literals',
161 'ConstructorSpecialMethod': 'Class constructor may not be an accessor',
162 'DuplicateConstructor': 'A class may only have one constructor',
163 'StaticPrototype': 'Classes may not have static property named prototype',
164 'MissingFromClause': 'Unexpected token',
165 'NoAsAfterImportNamespace': 'Unexpected token',
166 'InvalidModuleSpecifier': 'Unexpected token',
167 'IllegalImportDeclaration': 'Unexpected token',
168 'IllegalExportDeclaration': 'Unexpected token'}
170 PRECEDENCE = {'||':1,
198 ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'
200 for k,v in token.items():
204 setattr(Syntax, e, e)
206 for k,v in messages.items():
207 setattr(Messages, k, v)
209 #http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
223 U_CATEGORIES = defaultdict(list)
224 for c in map(unichr, range(sys.maxunicode + 1)):
225 U_CATEGORIES[unicodedata.category(c)].append(c)
226 UNICODE_LETTER = set(U_CATEGORIES['Lu']+U_CATEGORIES['Ll']+
227 U_CATEGORIES['Lt']+U_CATEGORIES['Lm']+
228 U_CATEGORIES['Lo']+U_CATEGORIES['Nl'])
229 UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn']+U_CATEGORIES['Mc'])
230 UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
231 UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
232 IDENTIFIER_START = UNICODE_LETTER.union(set(('$','_', '\\'))) # and some fucking unicode escape sequence
233 IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT)\
234 .union(UNICODE_CONNECTOR_PUNCTUATION).union(set((ZWJ, ZWNJ)))
236 WHITE_SPACE = set((0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680,
237 0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
238 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
239 0x2009, 0x200A, 0x202F, 0x205F, 0x3000,
242 LINE_TERMINATORS = set((0x0A, 0x0D, 0x2028, 0x2029))
244 def isIdentifierStart(ch):
245 return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_START
247 def isIdentifierPart(ch):
248 return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_PART
250 def isWhiteSpace(ch):
251 return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE
253 def isLineTerminator(ch):
254 return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS
256 OCTAL = set(('0', '1', '2', '3', '4', '5', '6', '7'))
257 DEC = set(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'))
258 HEX = set('0123456789abcdefABCDEF')
259 HEX_CONV = dict(('0123456789abcdef'[n],n) for n in xrange(16))
260 for i,e in enumerate('ABCDEF', 10):
264 def isDecimalDigit(ch):
265 return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC
268 return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX
270 def isOctalDigit(ch):
271 return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL
273 def isFutureReservedWord(w):
274 return w in ('enum', 'export', 'import', 'super')
277 RESERVED_WORD = set(('implements', 'interface', 'package', 'private', 'protected', 'public', 'static', 'yield', 'let'))
278 def isStrictModeReservedWord(w):
279 return w in RESERVED_WORD
281 def isRestrictedWord(w):
282 return w in ('eval', 'arguments')
285 KEYWORDS = set(('if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else', 'case',
286 'void', 'with', 'enum', 'while', 'break', 'catch', 'throw', 'const', 'yield',
287 'class', 'super', 'return', 'typeof', 'delete', 'switch', 'export', 'import',
288 'default', 'finally', 'extends', 'function', 'continue', 'debugger', 'instanceof', 'pyimport'))
290 # 'const' is specialized as Keyword in V8.
291 # 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
292 # Some others are from future reserved words.
296 class JsSyntaxError(Exception): pass
298 if __name__=='__main__':
299 assert isLineTerminator('\n')
300 assert isLineTerminator(0x0A)
301 assert isIdentifierStart('$')
302 assert isIdentifierStart(100)
303 assert isWhiteSpace(' ')