js2c.py 18.6 KB
Newer Older
1
#!/usr/bin/env python
2
#
3
# Copyright 2012 the V8 project authors. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#     * Neither the name of Google Inc. nor the names of its
#       contributors may be used to endorse or promote products derived
#       from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# This is a utility for converting JavaScript source code into C-style
# char arrays. It is used for embedded JavaScript code in the V8
# library.

34
import os, re
35
import optparse
36
import jsmin
37
import textwrap
38 39


40 41 42
class Error(Exception):
  def __init__(self, msg):
    Exception.__init__(self, msg)
43 44


45
def ToCArray(byte_sequence):
46
  result = []
47
  for chr in byte_sequence:
48
    result.append(str(ord(chr)))
49 50
  joined = ", ".join(result)
  return textwrap.fill(joined, 80)
51 52


53 54 55 56 57 58 59
def RemoveCommentsAndTrailingWhitespace(lines):
  lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
  lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
  lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace
  return lines


60 61 62 63 64 65 66 67 68
def ReadFile(filename):
  file = open(filename, "rt")
  try:
    lines = file.read()
  finally:
    file.close()
  return lines


69 70
EVAL_PATTERN = re.compile(r'\beval\s*\(')
WITH_PATTERN = re.compile(r'\bwith\s*\(')
71 72 73
INVALID_ERROR_MESSAGE_PATTERN = re.compile(
    r'Make(?!Generic)\w*Error\(([kA-Z]\w+)')
NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))')
74

75
def Validate(lines):
76 77
  # Because of simplified context setup, eval and with is not
  # allowed in the natives files.
78 79 80 81
  if EVAL_PATTERN.search(lines):
    raise Error("Eval disallowed in natives.")
  if WITH_PATTERN.search(lines):
    raise Error("With statements disallowed in natives.")
82 83 84
  invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines)
  if invalid_error:
    raise Error("Unknown error message template '%s'" % invalid_error.group(1))
85 86
  if NEW_ERROR_PATTERN.search(lines):
    raise Error("Error constructed without message template.")
87 88
  # Pass lines through unchanged.
  return lines
89 90


91
def ExpandConstants(lines, constants):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
92 93
  for key, value in constants:
    lines = key.sub(str(value), lines)
94 95
  return lines

96

97 98 99 100 101 102 103 104 105 106 107 108 109
def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
  pattern_match = name_pattern.search(lines, pos)
  while pattern_match is not None:
    # Scan over the arguments
    height = 1
    start = pattern_match.start()
    end = pattern_match.end()
    assert lines[end - 1] == '('
    last_match = end
    arg_index = [0]  # Wrap state into array, to work around Python "scoping"
    mapping = { }
    def add_arg(str):
      # Remember to expand recursively in the arguments
110 111 112
      if arg_index[0] >= len(macro.args):
        lineno = lines.count(os.linesep, 0, start) + 1
        raise Error('line %s: Too many arguments for macro "%s"' % (lineno, name_pattern.pattern))
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
      replacement = expander(str.strip())
      mapping[macro.args[arg_index[0]]] = replacement
      arg_index[0] += 1
    while end < len(lines) and height > 0:
      # We don't count commas at higher nesting levels.
      if lines[end] == ',' and height == 1:
        add_arg(lines[last_match:end])
        last_match = end + 1
      elif lines[end] in ['(', '{', '[']:
        height = height + 1
      elif lines[end] in [')', '}', ']']:
        height = height - 1
      end = end + 1
    # Remember to add the last match.
    add_arg(lines[last_match:end-1])
    result = macro.expand(mapping)
    # Replace the occurrence of the macro with the expansion
    lines = lines[:start] + result + lines[end:]
    pattern_match = name_pattern.search(lines, start + len(result))
  return lines

134
def ExpandMacros(lines, macros):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
135 136 137
  # We allow macros to depend on the previously declared macros, but
  # we don't allow self-dependecies or recursion.
  for name_pattern, macro in reversed(macros):
138 139 140
    def expander(s):
      return ExpandMacros(s, macros)
    lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
  return lines

class TextMacro:
  def __init__(self, args, body):
    self.args = args
    self.body = body
  def expand(self, mapping):
    result = self.body
    for key, value in mapping.items():
        result = result.replace(key, value)
    return result

class PythonMacro:
  def __init__(self, args, fun):
    self.args = args
    self.fun = fun
  def expand(self, mapping):
    args = []
    for arg in self.args:
      args.append(mapping[arg])
    return str(self.fun(*args))

163
CONST_PATTERN = re.compile(r'^define\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
164 165
MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
166

vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
167

168
def ReadMacros(lines):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
169 170
  constants = []
  macros = []
171
  for line in lines.split('\n'):
172 173 174 175 176 177 178 179
    hash = line.find('#')
    if hash != -1: line = line[:hash]
    line = line.strip()
    if len(line) is 0: continue
    const_match = CONST_PATTERN.match(line)
    if const_match:
      name = const_match.group(1)
      value = const_match.group(2).strip()
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
180
      constants.append((re.compile("\\b%s\\b" % name), value))
181 182 183 184
    else:
      macro_match = MACRO_PATTERN.match(line)
      if macro_match:
        name = macro_match.group(1)
185
        args = [match.strip() for match in macro_match.group(2).split(',')]
186
        body = macro_match.group(3).strip()
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
187
        macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
188 189 190 191
      else:
        python_match = PYTHON_MACRO_PATTERN.match(line)
        if python_match:
          name = python_match.group(1)
192
          args = [match.strip() for match in python_match.group(2).split(',')]
193 194
          body = python_match.group(3).strip()
          fun = eval("lambda " + ",".join(args) + ': ' + body)
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
195
          macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
196
        else:
197
          raise Error("Illegal line: " + line)
198 199
  return (constants, macros)

200

binji's avatar
binji committed
201
TEMPLATE_PATTERN = re.compile(r'^\s+T\(([A-Z][a-zA-Z0-9]*),')
202 203 204 205 206 207 208 209 210 211 212 213 214

def ReadMessageTemplates(lines):
  templates = []
  index = 0
  for line in lines.split('\n'):
    template_match = TEMPLATE_PATTERN.match(line)
    if template_match:
      name = "k%s" % template_match.group(1)
      value = index
      index = index + 1
      templates.append((re.compile("\\b%s\\b" % name), value))
  return templates

215 216 217
INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')

218
def ExpandInlineMacros(lines):
219 220 221 222 223 224 225 226 227 228
  pos = 0
  while True:
    macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
    if macro_match is None:
      # no more macros
      return lines
    name = macro_match.group(1)
    args = [match.strip() for match in macro_match.group(2).split(',')]
    end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
    if end_macro_match is None:
229
      raise Error("Macro %s unclosed" % name)
230 231 232 233 234 235 236 237 238 239 240 241 242
    body = lines[macro_match.end():end_macro_match.start()]

    # remove macro definition
    lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
    name_pattern = re.compile("\\b%s\\(" % name)
    macro = TextMacro(args, body)

    # advance position to where the macro defintion was
    pos = macro_match.start()

    def non_expander(s):
      return s
    lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
243

244

245
INLINE_CONSTANT_PATTERN = re.compile(r'define\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+);\n')
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265

def ExpandInlineConstants(lines):
  pos = 0
  while True:
    const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
    if const_match is None:
      # no more constants
      return lines
    name = const_match.group(1)
    replacement = const_match.group(2)
    name_pattern = re.compile("\\b%s\\b" % name)

    # remove constant definition and replace
    lines = (lines[:const_match.start()] +
             re.sub(name_pattern, replacement, lines[const_match.end():]))

    # advance position to where the constant defintion was
    pos = const_match.start()


266
HEADER_TEMPLATE = """\
267
// Copyright 2011 Google Inc. All Rights Reserved.
268

269
// This file was generated from .js source files by GYP.  If you
270
// want to make changes to this file you should either change the
271
// javascript source files or the GYP script.
272

273
#include "src/v8.h"
274
#include "src/snapshot/natives.h"
275
#include "src/utils.h"
276 277 278 279

namespace v8 {
namespace internal {

280
%(sources_declaration)s\
281

282 283
  template <>
  int NativesCollection<%(type)s>::GetBuiltinsCount() {
284 285 286
    return %(builtin_count)i;
  }

287
  template <>
288 289
  int NativesCollection<%(type)s>::GetDebuggerCount() {
    return %(debugger_count)i;
290 291
  }

292 293
  template <>
  int NativesCollection<%(type)s>::GetIndex(const char* name) {
294 295 296 297
%(get_index_cases)s\
    return -1;
  }

298
  template <>
299 300
  Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
%(get_script_source_cases)s\
301 302 303
    return Vector<const char>("", 0);
  }

304 305
  template <>
  Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
306 307 308 309
%(get_script_name_cases)s\
    return Vector<const char>("", 0);
  }

310
  template <>
311 312
  Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() {
    return Vector<const char>(sources, %(total_length)i);
313
  }
314 315 316 317
}  // internal
}  // v8
"""

318
SOURCES_DECLARATION = """\
319
  static const char sources[] = { %s };
320 321 322
"""


323
GET_INDEX_CASE = """\
324 325 326 327
    if (strcmp(name, "%(id)s") == 0) return %(i)i;
"""


328 329
GET_SCRIPT_SOURCE_CASE = """\
    if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i);
330 331 332
"""


333
GET_SCRIPT_NAME_CASE = """\
334 335 336
    if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
"""

337

338
def BuildFilterChain(macro_filename, message_template_file):
339 340 341 342 343 344
  """Build the chain of filter functions to be applied to the sources.

  Args:
    macro_filename: Name of the macro file, if any.

  Returns:
345
    A function (string -> string) that processes a source file.
346
  """
347
  filter_chain = []
348 349 350

  if macro_filename:
    (consts, macros) = ReadMacros(ReadFile(macro_filename))
351
    filter_chain.append(lambda l: ExpandMacros(l, macros))
352
    filter_chain.append(lambda l: ExpandConstants(l, consts))
353

354 355 356 357
  if message_template_file:
    message_templates = ReadMessageTemplates(ReadFile(message_template_file))
    filter_chain.append(lambda l: ExpandConstants(l, message_templates))

358 359 360
  filter_chain.extend([
    RemoveCommentsAndTrailingWhitespace,
    ExpandInlineMacros,
361
    ExpandInlineConstants,
362 363 364 365 366 367 368 369 370
    Validate,
    jsmin.JavaScriptMinifier().JSMinify
  ])

  def chain(f1, f2):
    return lambda x: f2(f1(x))

  return reduce(chain, filter_chain)

371
def BuildExtraFilterChain():
372
  return lambda x: RemoveCommentsAndTrailingWhitespace(Validate(x))
373 374 375 376 377 378 379 380 381

class Sources:
  def __init__(self):
    self.names = []
    self.modules = []
    self.is_debugger_id = []


def IsDebuggerFile(filename):
382
  return "debug" in filename
383 384 385 386

def IsMacroFile(filename):
  return filename.endswith("macros.py")

387 388 389
def IsMessageTemplateFile(filename):
  return filename.endswith("messages.h")

390

391
def PrepareSources(source_files, native_type, emit_js):
392 393 394
  """Read, prepare and assemble the list of source files.

  Args:
395
    source_files: List of JavaScript-ish source files. A file named macros.py
396
        will be treated as a list of macros.
397 398
    native_type: String corresponding to a NativeType enum value, allowing us
        to treat different types of sources differently.
399 400
    emit_js: True if we should skip the byte conversion and just leave the
        sources as JS strings.
401 402 403 404 405 406 407 408 409 410 411

  Returns:
    An instance of Sources.
  """
  macro_file = None
  macro_files = filter(IsMacroFile, source_files)
  assert len(macro_files) in [0, 1]
  if macro_files:
    source_files.remove(macro_files[0])
    macro_file = macro_files[0]

412 413 414 415 416 417 418
  message_template_file = None
  message_template_files = filter(IsMessageTemplateFile, source_files)
  assert len(message_template_files) in [0, 1]
  if message_template_files:
    source_files.remove(message_template_files[0])
    message_template_file = message_template_files[0]

419
  filters = None
420
  if native_type in ("EXTRAS", "EXPERIMENTAL_EXTRAS"):
421 422 423
    filters = BuildExtraFilterChain()
  else:
    filters = BuildFilterChain(macro_file, message_template_file)
424 425 426 427 428

  # Sort 'debugger' sources first.
  source_files = sorted(source_files,
                        lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))

429 430 431 432 433 434 435
  source_files_and_contents = [(f, ReadFile(f)) for f in source_files]

  # Have a single not-quite-empty source file if there are none present;
  # otherwise you get errors trying to compile an empty C++ array.
  # It cannot be empty (or whitespace, which gets trimmed to empty), as
  # the deserialization code assumes each file is nonempty.
  if not source_files_and_contents:
436
    source_files_and_contents = [("dummy.js", "(function() {})")]
437

438
  result = Sources()
439

440
  for (source, contents) in source_files_and_contents:
441
    try:
442
      lines = filters(contents)
443 444 445
    except Error as e:
      raise Error("In file %s:\n%s" % (source, str(e)))

446
    result.modules.append(lines)
447 448

    is_debugger = IsDebuggerFile(source)
449
    result.is_debugger_id.append(is_debugger)
450 451

    name = os.path.basename(source)[:-3]
452
    result.names.append(name)
453

454 455 456
  return result


457
def BuildMetadata(sources, source_bytes, native_type):
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
  """Build the meta data required to generate a libaries file.

  Args:
    sources: A Sources instance with the prepared sources.
    source_bytes: A list of source bytes.
        (The concatenation of all sources; might be compressed.)
    native_type: The parameter for the NativesCollection template.

  Returns:
    A dictionary for use with HEADER_TEMPLATE.
  """
  total_length = len(source_bytes)
  raw_sources = "".join(sources.modules)

  # The sources are expected to be ASCII-only.
  assert not filter(lambda value: ord(value) >= 128, raw_sources)

  # Loop over modules and build up indices into the source blob:
  get_index_cases = []
  get_script_name_cases = []
478
  get_script_source_cases = []
479 480 481 482 483 484 485 486 487
  offset = 0
  for i in xrange(len(sources.modules)):
    native_name = "native %s.js" % sources.names[i]
    d = {
        "i": i,
        "id": sources.names[i],
        "name": native_name,
        "length": len(native_name),
        "offset": offset,
488
        "source_length": len(sources.modules[i]),
489 490 491
    }
    get_index_cases.append(GET_INDEX_CASE % d)
    get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
492
    get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d)
493 494 495 496 497 498 499 500 501
    offset += len(sources.modules[i])
  assert offset == len(raw_sources)

  metadata = {
    "builtin_count": len(sources.modules),
    "debugger_count": sum(sources.is_debugger_id),
    "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
    "total_length": total_length,
    "get_index_cases": "".join(get_index_cases),
502
    "get_script_source_cases": "".join(get_script_source_cases),
503 504 505 506 507 508
    "get_script_name_cases": "".join(get_script_name_cases),
    "type": native_type,
  }
  return metadata


509
def PutInt(blob_file, value):
510 511 512 513 514 515 516 517 518 519
  assert(value >= 0 and value < (1 << 28))
  if (value < 1 << 6):
    size = 1
  elif (value < 1 << 14):
    size = 2
  elif (value < 1 << 22):
    size = 3
  else:
    size = 4
  value_with_length = (value << 2) | (size - 1)
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556

  byte_sequence = bytearray()
  for i in xrange(size):
    byte_sequence.append(value_with_length & 255)
    value_with_length >>= 8;
  blob_file.write(byte_sequence)


def PutStr(blob_file, value):
  PutInt(blob_file, len(value));
  blob_file.write(value);


def WriteStartupBlob(sources, startup_blob):
  """Write a startup blob, as expected by V8 Initialize ...
    TODO(vogelheim): Add proper method name.

  Args:
    sources: A Sources instance with the prepared sources.
    startup_blob_file: Name of file to write the blob to.
  """
  output = open(startup_blob, "wb")

  debug_sources = sum(sources.is_debugger_id);
  PutInt(output, debug_sources)
  for i in xrange(debug_sources):
    PutStr(output, sources.names[i]);
    PutStr(output, sources.modules[i]);

  PutInt(output, len(sources.names) - debug_sources)
  for i in xrange(debug_sources, len(sources.names)):
    PutStr(output, sources.names[i]);
    PutStr(output, sources.modules[i]);

  output.close()


557 558
def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js):
  prepared_sources = PrepareSources(sources, native_type, emit_js)
559 560
  sources_output = "".join(prepared_sources.modules)
  metadata = BuildMetadata(prepared_sources, sources_output, native_type)
561 562 563 564

  # Optionally emit raw file.
  if raw_file:
    output = open(raw_file, "w")
565
    output.write(sources_output)
566 567
    output.close()

568
  if startup_blob:
569
    WriteStartupBlob(prepared_sources, startup_blob)
570

571 572
  # Emit resulting source file.
  output = open(target, "w")
573
  if emit_js:
574 575 576
    output.write(sources_output)
  else:
    output.write(HEADER_TEMPLATE % metadata)
577 578
  output.close()

579

580
def main():
581 582 583 584 585 586 587
  parser = optparse.OptionParser()
  parser.add_option("--raw",
                    help="file to write the processed sources array to.")
  parser.add_option("--startup_blob",
                    help="file to write the startup blob to.")
  parser.add_option("--js",
                    help="writes a JS file output instead of a C file",
588 589
                    action="store_true", default=False, dest='js')
  parser.add_option("--nojs", action="store_false", default=False, dest='js')
590 591 592 593 594 595 596 597 598 599 600
  parser.set_usage("""js2c out.cc type sources.js ...
        out.cc: C code to be generated.
        type: type parameter for NativesCollection template.
        sources.js: JS internal sources or macros.py.""")
  (options, args) = parser.parse_args()
  JS2C(args[2:],
       args[0],
       args[1],
       options.raw,
       options.startup_blob,
       options.js)
601

602 603 604

if __name__ == "__main__":
  main()