js2c.py 10.8 KB
Newer Older
1
#!/usr/bin/env python
2
#
3
# Copyright 2012 the V8 project authors. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#     * Neither the name of Google Inc. nor the names of its
#       contributors may be used to endorse or promote products derived
#       from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# This is a utility for converting JavaScript source code into C-style
# char arrays. It is used for embedded JavaScript code in the V8
# library.

34
import os, re
35
import optparse
36
import textwrap
37 38


39 40 41
class Error(Exception):
  def __init__(self, msg):
    Exception.__init__(self, msg)
42 43


44
def ToCArray(byte_sequence):
45
  result = []
46
  for chr in byte_sequence:
47
    result.append(str(ord(chr)))
48 49
  joined = ", ".join(result)
  return textwrap.fill(joined, 80)
50 51


52 53
def RemoveCommentsEmptyLinesAndWhitespace(lines):
  lines = re.sub(r'\n+', '\n', lines) # empty lines
54 55
  lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
  lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
56 57
  lines = re.sub(r'\s+\n', '\n', lines) # trailing whitespace
  lines = re.sub(r'\n\s+', '\n', lines) # initial whitespace
58 59 60
  return lines


61 62 63 64 65 66 67 68 69
def ReadFile(filename):
  file = open(filename, "rt")
  try:
    lines = file.read()
  finally:
    file.close()
  return lines


70 71
EVAL_PATTERN = re.compile(r'\beval\s*\(')
WITH_PATTERN = re.compile(r'\bwith\s*\(')
72 73 74
INVALID_ERROR_MESSAGE_PATTERN = re.compile(
    r'Make(?!Generic)\w*Error\(([kA-Z]\w+)')
NEW_ERROR_PATTERN = re.compile(r'new \$\w*Error\((?!\))')
75

76
def Validate(lines):
77 78
  # Because of simplified context setup, eval and with is not
  # allowed in the natives files.
79 80 81 82
  if EVAL_PATTERN.search(lines):
    raise Error("Eval disallowed in natives.")
  if WITH_PATTERN.search(lines):
    raise Error("With statements disallowed in natives.")
83 84 85
  invalid_error = INVALID_ERROR_MESSAGE_PATTERN.search(lines)
  if invalid_error:
    raise Error("Unknown error message template '%s'" % invalid_error.group(1))
86 87
  if NEW_ERROR_PATTERN.search(lines):
    raise Error("Error constructed without message template.")
88 89
  # Pass lines through unchanged.
  return lines
90 91


92
def ExpandConstants(lines, constants):
vitalyr@chromium.org's avatar
vitalyr@chromium.org committed
93 94
  for key, value in constants:
    lines = key.sub(str(value), lines)
95 96
  return lines

97

98
HEADER_TEMPLATE = """\
99
// Copyright 2011 Google Inc. All Rights Reserved.
100

101
// This file was generated from .js source files by GYP.  If you
102
// want to make changes to this file you should either change the
103
// javascript source files or the GYP script.
104

105
#include "src/v8.h"
106
#include "src/snapshot/natives.h"
107
#include "src/utils.h"
108 109 110 111

namespace v8 {
namespace internal {

112
%(sources_declaration)s\
113

114 115
  template <>
  int NativesCollection<%(type)s>::GetBuiltinsCount() {
116 117 118
    return %(builtin_count)i;
  }

119 120
  template <>
  int NativesCollection<%(type)s>::GetIndex(const char* name) {
121 122 123 124
%(get_index_cases)s\
    return -1;
  }

125
  template <>
126 127
  Vector<const char> NativesCollection<%(type)s>::GetScriptSource(int index) {
%(get_script_source_cases)s\
128 129 130
    return Vector<const char>("", 0);
  }

131 132
  template <>
  Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
133 134 135 136
%(get_script_name_cases)s\
    return Vector<const char>("", 0);
  }

137
  template <>
138 139
  Vector<const char> NativesCollection<%(type)s>::GetScriptsSource() {
    return Vector<const char>(sources, %(total_length)i);
140
  }
141 142 143 144
}  // internal
}  // v8
"""

145
SOURCES_DECLARATION = """\
146
  static const char sources[] = { %s };
147 148 149
"""


150
GET_INDEX_CASE = """\
151 152 153 154
    if (strcmp(name, "%(id)s") == 0) return %(i)i;
"""


155 156
GET_SCRIPT_SOURCE_CASE = """\
    if (index == %(i)i) return Vector<const char>(sources + %(offset)i, %(source_length)i);
157 158 159
"""


160
GET_SCRIPT_NAME_CASE = """\
161 162 163
    if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
"""

164

165
def BuildFilterChain():
166 167 168
  """Build the chain of filter functions to be applied to the sources.

  Returns:
169
    A function (string -> string) that processes a source file.
170
  """
171
  filter_chain = [
172
    RemoveCommentsEmptyLinesAndWhitespace,
173
    Validate,
174
  ]
175 176 177 178 179 180

  def chain(f1, f2):
    return lambda x: f2(f1(x))

  return reduce(chain, filter_chain)

181
def BuildExtraFilterChain():
182
  return lambda x: RemoveCommentsEmptyLinesAndWhitespace(Validate(x))
183 184 185 186 187 188

class Sources:
  def __init__(self):
    self.names = []
    self.modules = []

189
def PrepareSources(source_files, native_type, emit_js):
190 191 192
  """Read, prepare and assemble the list of source files.

  Args:
193
    source_files: List of JavaScript-ish source files.
194 195
    native_type: String corresponding to a NativeType enum value, allowing us
        to treat different types of sources differently.
196 197
    emit_js: True if we should skip the byte conversion and just leave the
        sources as JS strings.
198 199 200 201

  Returns:
    An instance of Sources.
  """
202
  filters = BuildFilterChain()
203

204 205 206 207 208 209 210
  source_files_and_contents = [(f, ReadFile(f)) for f in source_files]

  # Have a single not-quite-empty source file if there are none present;
  # otherwise you get errors trying to compile an empty C++ array.
  # It cannot be empty (or whitespace, which gets trimmed to empty), as
  # the deserialization code assumes each file is nonempty.
  if not source_files_and_contents:
211
    source_files_and_contents = [("dummy.js", "(function() {})")]
212

213
  result = Sources()
214

215
  for (source, contents) in source_files_and_contents:
216
    try:
217
      lines = filters(contents)
218 219 220
    except Error as e:
      raise Error("In file %s:\n%s" % (source, str(e)))

221
    result.modules.append(lines)
222 223

    name = os.path.basename(source)[:-3]
224
    result.names.append(name)
225

226 227 228
  return result


229
def BuildMetadata(sources, source_bytes, native_type):
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
  """Build the meta data required to generate a libaries file.

  Args:
    sources: A Sources instance with the prepared sources.
    source_bytes: A list of source bytes.
        (The concatenation of all sources; might be compressed.)
    native_type: The parameter for the NativesCollection template.

  Returns:
    A dictionary for use with HEADER_TEMPLATE.
  """
  total_length = len(source_bytes)
  raw_sources = "".join(sources.modules)

  # The sources are expected to be ASCII-only.
  assert not filter(lambda value: ord(value) >= 128, raw_sources)

  # Loop over modules and build up indices into the source blob:
  get_index_cases = []
  get_script_name_cases = []
250
  get_script_source_cases = []
251 252 253 254 255 256 257 258 259
  offset = 0
  for i in xrange(len(sources.modules)):
    native_name = "native %s.js" % sources.names[i]
    d = {
        "i": i,
        "id": sources.names[i],
        "name": native_name,
        "length": len(native_name),
        "offset": offset,
260
        "source_length": len(sources.modules[i]),
261 262 263
    }
    get_index_cases.append(GET_INDEX_CASE % d)
    get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
264
    get_script_source_cases.append(GET_SCRIPT_SOURCE_CASE % d)
265 266 267 268 269 270 271 272
    offset += len(sources.modules[i])
  assert offset == len(raw_sources)

  metadata = {
    "builtin_count": len(sources.modules),
    "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
    "total_length": total_length,
    "get_index_cases": "".join(get_index_cases),
273
    "get_script_source_cases": "".join(get_script_source_cases),
274 275 276 277 278 279
    "get_script_name_cases": "".join(get_script_name_cases),
    "type": native_type,
  }
  return metadata


280
def PutInt(blob_file, value):
281 282 283 284 285 286 287 288 289 290
  assert(value >= 0 and value < (1 << 28))
  if (value < 1 << 6):
    size = 1
  elif (value < 1 << 14):
    size = 2
  elif (value < 1 << 22):
    size = 3
  else:
    size = 4
  value_with_length = (value << 2) | (size - 1)
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313

  byte_sequence = bytearray()
  for i in xrange(size):
    byte_sequence.append(value_with_length & 255)
    value_with_length >>= 8;
  blob_file.write(byte_sequence)


def PutStr(blob_file, value):
  PutInt(blob_file, len(value));
  blob_file.write(value);


def WriteStartupBlob(sources, startup_blob):
  """Write a startup blob, as expected by V8 Initialize ...
    TODO(vogelheim): Add proper method name.

  Args:
    sources: A Sources instance with the prepared sources.
    startup_blob_file: Name of file to write the blob to.
  """
  output = open(startup_blob, "wb")

314 315
  PutInt(output, len(sources.names))
  for i in xrange(len(sources.names)):
316 317 318 319 320 321
    PutStr(output, sources.names[i]);
    PutStr(output, sources.modules[i]);

  output.close()


322 323
def JS2C(sources, target, native_type, raw_file, startup_blob, emit_js):
  prepared_sources = PrepareSources(sources, native_type, emit_js)
324 325
  sources_output = "".join(prepared_sources.modules)
  metadata = BuildMetadata(prepared_sources, sources_output, native_type)
326 327 328 329

  # Optionally emit raw file.
  if raw_file:
    output = open(raw_file, "w")
330
    output.write(sources_output)
331 332
    output.close()

333
  if startup_blob:
334
    WriteStartupBlob(prepared_sources, startup_blob)
335

336 337
  # Emit resulting source file.
  output = open(target, "w")
338
  if emit_js:
339 340 341
    output.write(sources_output)
  else:
    output.write(HEADER_TEMPLATE % metadata)
342 343
  output.close()

344

345
def main():
346 347 348 349 350 351 352
  parser = optparse.OptionParser()
  parser.add_option("--raw",
                    help="file to write the processed sources array to.")
  parser.add_option("--startup_blob",
                    help="file to write the startup blob to.")
  parser.add_option("--js",
                    help="writes a JS file output instead of a C file",
353 354
                    action="store_true", default=False, dest='js')
  parser.add_option("--nojs", action="store_false", default=False, dest='js')
355 356 357
  parser.set_usage("""js2c out.cc type sources.js ...
        out.cc: C code to be generated.
        type: type parameter for NativesCollection template.
358
        sources.js: JS internal sources.""")
359 360 361 362 363 364 365
  (options, args) = parser.parse_args()
  JS2C(args[2:],
       args[0],
       args[1],
       options.raw,
       options.startup_blob,
       options.js)
366

367 368 369

if __name__ == "__main__":
  main()