js2c.py 18.4 KB
Newer Older
1
#!/usr/bin/env python
2
#
3
# Copyright 2012 the V8 project authors. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#     * Neither the name of Google Inc. nor the names of its
#       contributors may be used to endorse or promote products derived
#       from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# This is a utility for converting JavaScript source code into C-style
# char arrays. It is used for embedded JavaScript code in the V8
# library.

34
import os, re
35
import optparse
36
import jsmin
37
import textwrap
38 39


40 41 42
class Error(Exception):
  def __init__(self, msg):
    Exception.__init__(self, msg)
43 44


45
def ToCArray(byte_sequence):
46
  result = []
47
  for chr in byte_sequence:
48
    result.append(str(ord(chr)))
49 50
  joined = ", ".join(result)
  return textwrap.fill(joined, 80)
51 52


53 54
def RemoveCommentsEmptyLinesAndWhitespace(lines):
  lines = re.sub(r'\n+', '\n', lines) # empty lines
55 56
  lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
  lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
57 58
  lines = re.sub(r'\s+\n', '\n', lines) # trailing whitespace
  lines = re.sub(r'\n\s+', '\n', lines) # initial whitespace
59 60 61
  return lines


62 63 64 65 66 67 68 69 70
def ReadFile(filename):
  file = open(filename, "rt")
  try:
    lines = file.read()
  finally:
    file.close()
  return lines


71 72
EVAL_PATTERN = re.compile(r'\beval\s*\(')
WITH_PATTERN = re.compile(r'\bwith\s*\(')
73 74 75
INVALID_ERROR_MESSAGE_PATTERN = re.compile(
    r'Make(?!Generic)\w*Error\(([kA-Z]\w+)')
NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))')
76

77
def Validate(lines):
78 79
  # Because of simplified context setup, eval and with is not
  # allowed in the natives files.
80 81 82 83
  if EVAL_PATTERN.search(lines):
    raise Error("Eval disallowed in natives.")
  if WITH_PATTERN.search(lines):
    raise Error("With statements disallowed in natives.")
84 85 86
  invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines)
  if invalid_error:
    raise Error("Unknown error message template '%s'" % invalid_error.group(1))
87 88
  if NEW_ERROR_PATTERN.search(lines):
    raise Error("Error constructed without message template.")
89 90
  # Pass lines through unchanged.
  return lines
91 92


93
def ExpandConstants(lines, constants):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
94 95
  for key, value in constants:
    lines = key.sub(str(value), lines)
96 97
  return lines

98

99 100 101 102 103 104 105 106 107 108 109 110 111
def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
  pattern_match = name_pattern.search(lines, pos)
  while pattern_match is not None:
    # Scan over the arguments
    height = 1
    start = pattern_match.start()
    end = pattern_match.end()
    assert lines[end - 1] == '('
    last_match = end
    arg_index = [0]  # Wrap state into array, to work around Python "scoping"
    mapping = { }
    def add_arg(str):
      # Remember to expand recursively in the arguments
112 113 114
      if arg_index[0] >= len(macro.args):
        lineno = lines.count(os.linesep, 0, start) + 1
        raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern))
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
      replacement = expander(str.strip())
      mapping[macro.args[arg_index[0]]] = replacement
      arg_index[0] += 1
    while end < len(lines) and height > 0:
      # We don't count commas at higher nesting levels.
      if lines[end] == ',' and height == 1:
        add_arg(lines[last_match:end])
        last_match = end + 1
      elif lines[end] in ['(', '{', '[']:
        height = height + 1
      elif lines[end] in [')', '}', ']']:
        height = height - 1
      end = end + 1
    # Remember to add the last match.
    add_arg(lines[last_match:end-1])
130 131 132
    if arg_index[0] < len(macro.args) -1:
      lineno = lines.count(os.linesep, 0, start) + 1
      raise Error('line %s: Too few arguments for macro "%s"' % (lineno, name_pattern.pattern))
133 134 135 136 137 138
    result = macro.expand(mapping)
    # Replace the occurrence of the macro with the expansion
    lines = lines[:start] + result + lines[end:]
    pattern_match = name_pattern.search(lines, start + len(result))
  return lines

139
def ExpandMacros(lines, macros):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
140 141 142
  # We allow macros to depend on the previously declared macros, but
  # we don't allow self-dependecies or recursion.
  for name_pattern, macro in reversed(macros):
143 144 145
    def expander(s):
      return ExpandMacros(s, macros)
    lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
146 147 148 149 150 151 152
  return lines

class TextMacro:
  def __init__(self, args, body):
    self.args = args
    self.body = body
  def expand(self, mapping):
153 154 155 156 157 158
    # Keys could be substrings of earlier values. To avoid unintended
    # clobbering, apply all replacements simultaneously.
    any_key_pattern = "|".join(re.escape(k) for k in mapping.iterkeys())
    def replace(match):
      return mapping[match.group(0)]
    return re.sub(any_key_pattern, replace, self.body)
159

160
CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
161
MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
162

vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
163

164
def ReadMacros(lines):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
165 166
  constants = []
  macros = []
167
  for line in lines.split('\n'):
168 169 170 171 172 173 174 175
    hash = line.find('#')
    if hash != -1: line = line[:hash]
    line = line.strip()
    if len(line) is 0: continue
    const_match = CONST_PATTERN.match(line)
    if const_match:
      name = const_match.group(1)
      value = const_match.group(2).strip()
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
176
      constants.append((re.compile("\\b%s\\b" % name), value))
177 178 179 180
    else:
      macro_match = MACRO_PATTERN.match(line)
      if macro_match:
        name = macro_match.group(1)
181
        args = [match.strip() for match in macro_match.group(2).split(',')]
182
        body = macro_match.group(3).strip()
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
183
        macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
184
      else:
185
        raise Error("Illegal line: " + line)
186 187
  return (constants, macros)

188

binji's avatar
binji committed
189
TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),')
190 191 192 193 194 195 196 197 198 199 200 201 202

def ReadMessageTemplates(lines):
  templates = []
  index = 0
  for line in lines.split('\n'):
    template_match = TEMPLATE_PATTERN.match(line)
    if template_match:
      name = "k%s" % template_match.group(1)
      value = index
      index = index + 1
      templates.append((re.compile("\\b%s\\b" % name), value))
  return templates

203 204 205
INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')

206
def ExpandInlineMacros(lines):
207 208 209 210 211 212 213 214 215 216
  pos = 0
  while True:
    macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
    if macro_match is None:
      # no more macros
      return lines
    name = macro_match.group(1)
    args = [match.strip() for match in macro_match.group(2).split(',')]
    end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
    if end_macro_match is None:
217
      raise Error("Macro %s unclosed" % name)
218 219 220 221 222 223 224
    body = lines[macro_match.end():end_macro_match.start()]

    # remove macro definition
    lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
    name_pattern = re.compile("\\b%s\\(" % name)
    macro = TextMacro(args, body)

225
    # advance position to where the macro definition was
226 227 228 229 230
    pos = macro_match.start()

    def non_expander(s):
      return s
    lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
231

232

233
INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n')
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249

def ExpandInlineConstants(lines):
  pos = 0
  while True:
    const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
    if const_match is None:
      # no more constants
      return lines
    name = const_match.group(1)
    replacement = const_match.group(2)
    name_pattern = re.compile("\\b%s\\b" % name)

    # remove constant definition and replace
    lines = (lines[:const_match.start()] +
             re.sub(name_pattern, replacement, lines[const_match.end():]))

250
    # advance position to where the constant definition was
251 252 253
    pos = const_match.start()


254
HEADER_TEMPLATE = """\
255
// Copyright 2011 Google Inc. All Rights Reserved.
256

257
// This file was generated from .js source files by GYP.  If you
258
// want to make changes to this file you should either change the
259
// javascript source files or the GYP script.
260

261
#include "src/v8.h"
262
#include "src/snapshot/natives.h"
263
#include "src/utils.h"
264 265 266 267

namespace v8 {
namespace internal {

268
%(sources_declaration)s\
269

270 271
  template <>
  int NativesCollection<%(type)s>::GetBuiltinsCount() {
272 273 274
    return %(builtin_count)i;
  }

275
  template <>
276 277
  int NativesCollection<%(type)s>::GetDebuggerCount() {
    return %(debugger_count)i;
278 279
  }

280 281
  template <>
  int NativesCollection<%(type)s>::GetIndex(const char* name) {
282 283 284 285
%(get_index_cases)s\
    return -1;
  }

286
  template <>
287 288
  Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
%(get_script_source_cases)s\
289 290 291
    return Vector<const char>("", 0);
  }

292 293
  template <>
  Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
294 295 296 297
%(get_script_name_cases)s\
    return Vector<const char>("", 0);
  }

298
  template <>
299 300
  Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() {
    return Vector<const char>(sources, %(total_length)i);
301
  }
302 303 304 305
}  // internal
}  // v8
"""

306
SOURCES_DECLARATION = """\
307
  static const char sources[] = { %s };
308 309 310
"""


311
GET_INDEX_CASE = """\
312 313 314 315
    if (strcmp(name, "%(id)s") == 0) return %(i)i;
"""


316 317
GET_SCRIPT_SOURCE_CASE = """\
    if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i);
318 319 320
"""


321
GET_SCRIPT_NAME_CASE = """\
322 323 324
    if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
"""

325

326
def BuildFilterChain(macro_filename, message_template_file):
327 328 329 330 331 332
  """Build the chain of filter functions to be applied to the sources.

  Args:
    macro_filename: Name of the macro file, if any.

  Returns:
333
    A function (string -> string) that processes a source file.
334
  """
335
  filter_chain = []
336 337 338

  if macro_filename:
    (consts, macros) = ReadMacros(ReadFile(macro_filename))
339
    filter_chain.append(lambda l: ExpandMacros(l, macros))
340
    filter_chain.append(lambda l: ExpandConstants(l, consts))
341

342 343 344 345
  if message_template_file:
    message_templates = ReadMessageTemplates(ReadFile(message_template_file))
    filter_chain.append(lambda l: ExpandConstants(l, message_templates))

346
  filter_chain.extend([
347
    RemoveCommentsEmptyLinesAndWhitespace,
348
    ExpandInlineMacros,
349
    ExpandInlineConstants,
350 351 352 353 354 355 356 357 358
    Validate,
    jsmin.JavaScriptMinifier().JSMinify
  ])

  def chain(f1, f2):
    return lambda x: f2(f1(x))

  return reduce(chain, filter_chain)

359
def BuildExtraFilterChain():
360
  return lambda x: RemoveCommentsEmptyLinesAndWhitespace(Validate(x))
361 362 363 364 365 366 367 368 369

class Sources:
  def __init__(self):
    self.names = []
    self.modules = []
    self.is_debugger_id = []


def IsDebuggerFile(filename):
370
  return os.path.basename(os.path.dirname(filename)) == "debug"
371 372 373 374

def IsMacroFile(filename):
  return filename.endswith("macros.py")

375 376 377
def IsMessageTemplateFile(filename):
  return filename.endswith("messages.h")

378

379
def PrepareSources(source_files, native_type, emit_js):
380 381 382
  """Read, prepare and assemble the list of source files.

  Args:
383
    source_files: List of JavaScript-ish source files. A file named macros.py
384
        will be treated as a list of macros.
385 386
    native_type: String corresponding to a NativeType enum value, allowing us
        to treat different types of sources differently.
387 388
    emit_js: True if we should skip the byte conversion and just leave the
        sources as JS strings.
389 390 391 392 393 394 395 396 397 398 399

  Returns:
    An instance of Sources.
  """
  macro_file = None
  macro_files = filter(IsMacroFile, source_files)
  assert len(macro_files) in [0, 1]
  if macro_files:
    source_files.remove(macro_files[0])
    macro_file = macro_files[0]

400 401 402 403 404 405 406
  message_template_file = None
  message_template_files = filter(IsMessageTemplateFile, source_files)
  assert len(message_template_files) in [0, 1]
  if message_template_files:
    source_files.remove(message_template_files[0])
    message_template_file = message_template_files[0]

407
  filters = None
408
  if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"):
409 410 411
    filters = BuildExtraFilterChain()
  else:
    filters = BuildFilterChain(macro_file, message_template_file)
412 413 414 415 416

  # Sort 'debugger' sources first.
  source_files = sorted(source_files,
                        lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))

417 418 419 420 421 422 423
  source_files_and_contents = [(f, ReadFile(f)) for f in source_files]

  # Have a single not-quite-empty source file if there are none present;
  # otherwise you get errors trying to compile an empty C++ array.
  # It cannot be empty (or whitespace, which gets trimmed to empty), as
  # the deserialization code assumes each file is nonempty.
  if not source_files_and_contents:
424
    source_files_and_contents = [("dummy.js", "(function() {})")]
425

426
  result = Sources()
427

428
  for (source, contents) in source_files_and_contents:
429
    try:
430
      lines = filters(contents)
431 432 433
    except Error as e:
      raise Error("In file %s:\n%s" % (source, str(e)))

434
    result.modules.append(lines)
435 436

    is_debugger = IsDebuggerFile(source)
437
    result.is_debugger_id.append(is_debugger)
438 439

    name = os.path.basename(source)[:-3]
440
    result.names.append(name)
441

442 443 444
  return result


445
def BuildMetadata(sources, source_bytes, native_type):
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
  """Build the meta data required to generate a libaries file.

  Args:
    sources: A Sources instance with the prepared sources.
    source_bytes: A list of source bytes.
        (The concatenation of all sources; might be compressed.)
    native_type: The parameter for the NativesCollection template.

  Returns:
    A dictionary for use with HEADER_TEMPLATE.
  """
  total_length = len(source_bytes)
  raw_sources = "".join(sources.modules)

  # The sources are expected to be ASCII-only.
  assert not filter(lambda value: ord(value) >= 128, raw_sources)

  # Loop over modules and build up indices into the source blob:
  get_index_cases = []
  get_script_name_cases = []
466
  get_script_source_cases = []
467 468 469 470 471 472 473 474 475
  offset = 0
  for i in xrange(len(sources.modules)):
    native_name = "native %s.js" % sources.names[i]
    d = {
        "i": i,
        "id": sources.names[i],
        "name": native_name,
        "length": len(native_name),
        "offset": offset,
476
        "source_length": len(sources.modules[i]),
477 478 479
    }
    get_index_cases.append(GET_INDEX_CASE % d)
    get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
480
    get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d)
481 482 483 484 485 486 487 488 489
    offset += len(sources.modules[i])
  assert offset == len(raw_sources)

  metadata = {
    "builtin_count": len(sources.modules),
    "debugger_count": sum(sources.is_debugger_id),
    "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
    "total_length": total_length,
    "get_index_cases": "".join(get_index_cases),
490
    "get_script_source_cases": "".join(get_script_source_cases),
491 492 493 494 495 496
    "get_script_name_cases": "".join(get_script_name_cases),
    "type": native_type,
  }
  return metadata


497
def PutInt(blob_file, value):
498 499 500 501 502 503 504 505 506 507
  assert(value >= 0 and value < (1 << 28))
  if (value < 1 << 6):
    size = 1
  elif (value < 1 << 14):
    size = 2
  elif (value < 1 << 22):
    size = 3
  else:
    size = 4
  value_with_length = (value << 2) | (size - 1)
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544

  byte_sequence = bytearray()
  for i in xrange(size):
    byte_sequence.append(value_with_length & 255)
    value_with_length >>= 8;
  blob_file.write(byte_sequence)


def PutStr(blob_file, value):
  PutInt(blob_file, len(value));
  blob_file.write(value);


def WriteStartupBlob(sources, startup_blob):
  """Write a startup blob, as expected by V8 Initialize ...
    TODO(vogelheim): Add proper method name.

  Args:
    sources: A Sources instance with the prepared sources.
    startup_blob_file: Name of file to write the blob to.
  """
  output = open(startup_blob, "wb")

  debug_sources = sum(sources.is_debugger_id);
  PutInt(output, debug_sources)
  for i in xrange(debug_sources):
    PutStr(output, sources.names[i]);
    PutStr(output, sources.modules[i]);

  PutInt(output, len(sources.names) - debug_sources)
  for i in xrange(debug_sources, len(sources.names)):
    PutStr(output, sources.names[i]);
    PutStr(output, sources.modules[i]);

  output.close()


545 546
def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js):
  prepared_sources = PrepareSources(sources, native_type, emit_js)
547 548
  sources_output = "".join(prepared_sources.modules)
  metadata = BuildMetadata(prepared_sources, sources_output, native_type)
549 550 551 552

  # Optionally emit raw file.
  if raw_file:
    output = open(raw_file, "w")
553
    output.write(sources_output)
554 555
    output.close()

556
  if startup_blob:
557
    WriteStartupBlob(prepared_sources, startup_blob)
558

559 560
  # Emit resulting source file.
  output = open(target, "w")
561
  if emit_js:
562 563 564
    output.write(sources_output)
  else:
    output.write(HEADER_TEMPLATE % metadata)
565 566
  output.close()

567

568
def main():
569 570 571 572 573 574 575
  parser = optparse.OptionParser()
  parser.add_option("--raw",
                    help="file to write the processed sources array to.")
  parser.add_option("--startup_blob",
                    help="file to write the startup blob to.")
  parser.add_option("--js",
                    help="writes a JS file output instead of a C file",
576 577
                    action="store_true", default=False, dest='js')
  parser.add_option("--nojs", action="store_false", default=False, dest='js')
578 579 580 581 582 583 584 585 586 587 588
  parser.set_usage("""js2c out.cc type sources.js ...
        out.cc: C code to be generated.
        type: type parameter for NativesCollection template.
        sources.js: JS internal sources or macros.py.""")
  (options, args) = parser.parse_args()
  JS2C(args[2:],
       args[0],
       args[1],
       options.raw,
       options.startup_blob,
       options.js)
589

590 591 592

if __name__ == "__main__":
  main()