", false, null, false,
printResults, 0, module.getScopeInfo(node), cflags);
module.mainCode = main;
}
// Error message formats required by loadPyBytecode
private static String TRIED_CREATE_PYC_MSG =
"\nJython tried to create a pyc-file by executing\n %s\nwhich failed because %s";
private static String LARGE_METHOD_MSG = "Module or method too large in `%s`.";
private static String PLEASE_PROVIDE_MSG =
"\n\nPlease provide a CPython 2.7 bytecode file (.pyc), e.g. run"
+ "\n python -m py_compile %s";
private static String CPYTHON_CMD_MSG =
"\n\nAlternatively, specify a CPython 2.7 command via the " //
+ PYTHON_CPYTHON + " property, e.g.:" //
+ "\n jython -D" + PYTHON_CPYTHON + "=python" //
+ "\nor (e.g. for pip) through the environment variable JYTHON_OPTS:" //
+ "\n export JYTHON_OPTS=\"-D" + PYTHON_CPYTHON
+ "=python\"\n";
private static PyBytecode loadPyBytecode(String filename, boolean try_cpython)
throws RuntimeException {
if (filename.startsWith(ClasspathPyImporter.PYCLASSPATH_PREFIX)) {
ClassLoader cld = Py.getSystemState().getClassLoader();
if (cld == null) {
cld = imp.getParentClassLoader();
}
URL py_url =
cld.getResource(filename.replace(ClasspathPyImporter.PYCLASSPATH_PREFIX, ""));
if (py_url != null) {
filename = py_url.getPath();
} else {
// Should never happen, but let's play it safe and treat this case.
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename)
+ "but couldn't resolve that filename within classpath.\n"
+ "Make sure the source file is at a proper location.");
}
}
String pyc_filename = filename + "c";
File pyc_file = new File(pyc_filename);
if (pyc_file.exists()) {
PyFile f = new PyFile(pyc_filename, "rb", 4096);
byte[] bts = f.read(8).toBytes();
int magic = (bts[1] << 8) & 0x0000FF00 | (bts[0] << 0) & 0x000000FF;
// int mtime_pyc = (bts[7]<<24) & 0xFF000000 |
// (bts[6]<<16) & 0x00FF0000 |
// (bts[5]<< 8) & 0x0000FF00 |
// (bts[4]<< 0) & 0x000000FF;
if (magic != 62211) { // check Python 2.7 bytecode
throw new RuntimeException(
String.format(LARGE_METHOD_MSG, filename) //
+ "\n'" + pyc_filename + "' is not CPython 2.7 bytecode." //
+ String.format(PLEASE_PROVIDE_MSG, filename));
}
_marshal.Unmarshaller un = new _marshal.Unmarshaller(f);
PyObject code = un.load();
f.close();
if (code instanceof PyBytecode) {
return (PyBytecode) code;
}
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename) //
+ "\n'" + pyc_filename + "' contains invalid bytecode."
+ String.format(PLEASE_PROVIDE_MSG, filename));
} else {
String CPython_command = System.getProperty(PYTHON_CPYTHON);
if (try_cpython && CPython_command != null) {
// check version...
String command_ver = CPython_command + " --version";
String command = CPython_command + " -m py_compile " + filename;
Exception exc = null;
int result = 0;
String reason;
try {
Process p = Runtime.getRuntime().exec(command_ver);
// Python 2.7 writes version to error-stream for some reason:
BufferedReader br =
new BufferedReader(new InputStreamReader(p.getErrorStream()));
String cp_version = br.readLine();
while (br.readLine() != null) {}
br.close();
if (cp_version == null) {
// Also try input-stream as fallback, just in case...
br = new BufferedReader(new InputStreamReader(p.getInputStream()));
cp_version = br.readLine();
while (br.readLine() != null) {}
br.close();
}
result = p.waitFor();
if (!cp_version.startsWith("Python 2.7.")) {
reason = cp_version + " has been provided, but 2.7.x is required.";
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename)
+ String.format(TRIED_CREATE_PYC_MSG, command, reason)
+ String.format(PLEASE_PROVIDE_MSG, filename) + CPYTHON_CMD_MSG);
}
} catch (InterruptedException | IOException e) {
exc = e;
}
if (exc == null && result == 0) {
try {
Process p = Runtime.getRuntime().exec(command);
result = p.waitFor();
if (result == 0) {
return loadPyBytecode(filename, false);
}
} catch (InterruptedException | IOException e) {
exc = e;
}
}
reason = exc != null ? "of " + exc.toString() : "of a bad return: " + result;
String exc_msg = String.format(LARGE_METHOD_MSG, filename)
+ String.format(TRIED_CREATE_PYC_MSG, command, reason)
+ String.format(PLEASE_PROVIDE_MSG, filename) + CPYTHON_CMD_MSG;
throw exc != null ? new RuntimeException(exc_msg, exc)
: new RuntimeException(exc_msg);
} else {
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename)
+ String.format(PLEASE_PROVIDE_MSG, filename) + CPYTHON_CMD_MSG);
}
}
}
private static String serializePyBytecode(PyBytecode btcode) throws java.io.IOException {
// For some reason we cannot do this using _marshal:
/*
cStringIO.StringIO buf = cStringIO.StringIO();
_marshal.Marshaller marsh = new _marshal.Marshaller(buf);
marsh.dump(largest_m_code);
String code_str = buf.getvalue().asString();
_marshal.Unmarshaller un2 = new _marshal.Unmarshaller(cStringIO.StringIO(code_str));
PyBytecode code = (PyBytecode) un2.load();
This says 'ValueError: bad marshal data'
Maybe the issue is actually with cStringIO, because bytecode-marshalling uses
bytes not directly suitable as String-values. cStringIO does not use Base64 or
something, but rather supports only string-compatible data.
*/
// so we use Java-serialization...
// serialize the object
ByteArrayOutputStream bo = new ByteArrayOutputStream();
ObjectOutputStream so = new ObjectOutputStream(bo);
so.writeObject(btcode);
so.flush();
// From Java 8 use: String code_str = Base64.getEncoder().encodeToString(bo.toByteArray());
String code_str = base64encodeToString(bo.toByteArray());
so.close();
bo.close();
return code_str;
}
/**
* Implement a simplified base64 encoding compatible with the decoding in BytecodeLoader. This
* encoder adds no '=' padding or line-breaks. equivalent to
* {@code binascii.b2a_base64(bytes).rstrip('=\n')}.
*
* @param data to encode
* @return the string encoding the data
*/
private static String base64encodeToString(byte[] data) {
final int N = data.length;
int tail = N % 3;
StringBuilder chars = new StringBuilder(((N / 3) + 1) * 4);
// Process bytes in blocks of three
int b = 0, quantum;
while (b <= N - 3) {
// Process [b:b+3]
quantum = ((data[b++] & 0xff) << 16) + ((data[b++] & 0xff) << 8) + (data[b++] & 0xff);
chars.append(base64enc[quantum >> 18]);
chars.append(base64enc[(quantum >> 12) & 0x3f]);
chars.append(base64enc[(quantum >> 6) & 0x3f]);
chars.append(base64enc[quantum & 0x3f]);
}
// Process the tail bytes
if (tail >= 1) {
quantum = ((data[b++] & 0xff) << 8);
if (tail == 2) {
quantum += data[b++] & 0xff;
}
chars.append(base64enc[quantum >> 10]);
chars.append(base64enc[(quantum >> 4) & 0x3f]);
if (tail == 2) {
chars.append(base64enc[(quantum << 2) & 0x3f]);
}
}
return chars.toString();
}
/** Look-up table for {@link #base64encodeToString(byte[])}. */
private static final char[] base64enc =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
private static final int maxLiteral = 65535;
/**
* This method stores Base64 encoded Python byte code in one or more String literals.
*
* While Java String objects are limited only by the address range of arrays, the class file
* standard only supports literals representable in at most 65535 bytes of modified UTF-8. This
* method us used only with base64 Strings (therefore ASCII without nulls) and so each character
* occupies exactly 1 byte in the class file after encoding to UTF-8.
*
* To work within the 65535 byte limitation, the {@code code_str} is split into several literals
* with the following naming-scheme:
*
* - The marker-interface 'ContainsPyBytecode' indicates that a class contains (static final)
* literals of the following scheme:
*
- a prefix of '___' indicates a bytecode-containing string literal
*
- a number indicating the number of parts follows
*
- '0_' indicates that no splitting occurred
*
- otherwise another number follows, naming the index of the literal
*
- indexing starts at 0
*
* Examples:
*
* - {@code ___0_method1} contains bytecode for method1
*
- {@code ___2_0_method2} contains first part of method2's bytecode
*
- {@code ___2_1_method2} contains second part of method2's bytecode
*
* Note that this approach is provisional. In future, Jython might contain the bytecode directly
* as bytecode-objects. The current approach was feasible with far less complicated JVM
* bytecode-manipulation, but needs special treatment after class-loading.
*
* @param name of the method or function being generated
* @param code_str Base64 encoded CPython byte code
* @param module currently being defined as a class file
* @throws java.io.IOException
*/
private static void insert_code_str_to_classfile(String name, String code_str, Module module)
throws java.io.IOException {
if (code_str.length() <= maxLiteral) {
// This can go as a single literal
module.classfile.addFinalStringLiteral("___0_" + name, code_str);
} else {
// We need to split the code into several literals.
int splits = code_str.length() / maxLiteral;
if (code_str.length() % maxLiteral > 0) {
++splits;
}
int pos = 0, i = 0;
for (; pos + maxLiteral <= code_str.length(); ++i) {
module.classfile.addFinalStringLiteral("___" + splits + "_" + i + "_" + name,
code_str.substring(pos, pos + maxLiteral));
pos += maxLiteral;
}
if (i < splits) {
module.classfile.addFinalStringLiteral("___" + splits + "_" + i + "_" + name,
code_str.substring(pos));
}
}
}
/**
* Create and write a Python module as a Java class file.
*
* @param node AST of the module to write
* @param ostream stream onto which to write it
* @param name
* @param filename
* @param linenumbers
* @param printResults
* @param cflags
* @param mtime
* @throws Exception
*/
public static void compile(mod node, OutputStream ostream, String name, String filename,
boolean linenumbers, boolean printResults, CompilerFlags cflags, long mtime)
throws Exception {
try {
Module module = new Module(name, filename, linenumbers, mtime);
_module_init(node, module, printResults, cflags);
module.write(ostream);
} catch (MethodTooLargeException re) {
PyBytecode btcode = loadPyBytecode(filename, true);
int thresh = 22000;
/*
* No idea, how to determine at this point if a method is oversized, so we just try a
* threshold regarding Python code-length, while JVM restriction is actually about Java
* bytecode length. Anyway; given that code-lengths are strongly related, this should
* work well enough.
*/
while (true) { // Always enjoy to write a line like this :)
try {
List largest_m_codes = new ArrayList<>();
Stack buffer = new Stack<>();
// HashSet allCodes = new HashSet<>();
buffer.push(btcode);
// allCodes.add(btcode);
while (!buffer.isEmpty()) {
/*
* Probably this cannot yield cycles, so cycle-proof stuff is out-commented
* for now. (everything regarding 'allCodes')
*/
PyBytecode bcode = buffer.pop();
if (bcode.co_code.length > thresh) {
largest_m_codes.add(bcode);
} else {
/*
* If a function needs to be represented as CPython bytecode, we create
* all inner PyCode-items (classes, functions, methods) also as CPython
* bytecode implicitly, so no need to look at them individually. Maybe
* we can later optimize this such that inner methods can be
* JVM-bytecode as well (if not oversized themselves).
*/
for (PyObject item : bcode.co_consts) {
if (item instanceof PyBytecode /* && !allCodes.contains(item) */) {
PyBytecode mpbc = (PyBytecode) item;
buffer.push(mpbc);
// allCodes.add(mpbc);
}
}
}
}
Module module = new Module(name, filename, linenumbers, mtime);
module.oversized_methods = new Hashtable<>(largest_m_codes.size());
int ov_id = 0;
String name_id;
for (PyBytecode largest_m_code : largest_m_codes) {
if (!PyCodeConstant.isJavaIdentifier(largest_m_code.co_name)) {
name_id = "f$_" + ov_id++;
} else {
name_id = largest_m_code.co_name + "$_" + ov_id++;
}
if (largest_m_code.co_name.equals("")) {
/*
* In Jython's opinion module begins at line 0 (while CPython reports
* line 1)
*/
module.oversized_methods.put(largest_m_code.co_name + 0, name_id);
} else {
module.oversized_methods.put(
largest_m_code.co_name + largest_m_code.co_firstlineno,
name_id);
}
String code_str = serializePyBytecode(largest_m_code);
insert_code_str_to_classfile(name_id, code_str, module);
}
module.classfile.addInterface(p(org.python.core.ContainsPyBytecode.class));
_module_init(node, module, printResults, cflags);
module.write(ostream);
break;
} catch (MethodTooLargeException e) {
thresh -= 1000;
}
if (thresh < 10000) {
/*
* This value should be well feasible by JVM-bytecode, so something else must be
* wrong.
*/
throw new RuntimeException(
"For unknown reason, too large method code couldn't be resolved"
+ "\nby PyBytecode-approach:\n" + filename);
}
}
}
}
public void emitNum(Num node, Code code) throws Exception {
if (node.getInternalN() instanceof PyInteger) {
integerConstant(((PyInteger)node.getInternalN()).getValue()).get(code);
} else if (node.getInternalN() instanceof PyLong) {
longConstant(((PyObject)node.getInternalN()).__str__().toString()).get(code);
} else if (node.getInternalN() instanceof PyFloat) {
floatConstant(((PyFloat)node.getInternalN()).getValue()).get(code);
} else if (node.getInternalN() instanceof PyComplex) {
complexConstant(((PyComplex)node.getInternalN()).imag).get(code);
}
}
public void emitStr(Str node, Code code) throws Exception {
PyString s = (PyString)node.getInternalS();
if (s instanceof PyUnicode) {
unicodeConstant(s.asString()).get(code);
} else {
stringConstant(s.asString()).get(code);
}
}
public boolean emitPrimitiveArraySetters(java.util.List nodes, Code code)
throws Exception {
final int n = nodes.size();
if (n < USE_SETTERS_LIMIT) {
return false; // Too small to matter, so bail
}
// Only attempt if all nodes are either Num or Str, otherwise bail
boolean primitive_literals = true;
for (int i = 0; i < n; i++) {
PythonTree node = nodes.get(i);
if (!(node instanceof Num || node instanceof Str)) {
primitive_literals = false;
}
}
if (!primitive_literals) {
return false;
}
final int num_setters = (n / MAX_SETTINGS_PER_SETTER) + 1;
code.iconst(n);
code.anewarray(p(PyObject.class));
for (int i = 0; i < num_setters; i++) {
Code setter = this.classfile.addMethod("set$$" + setter_count,
sig(Void.TYPE, PyObject[].class), ACC_STATIC | ACC_PRIVATE);
for (int j = 0; (j < MAX_SETTINGS_PER_SETTER)
&& ((i * MAX_SETTINGS_PER_SETTER + j) < n); j++) {
setter.aload(0);
setter.iconst(i * MAX_SETTINGS_PER_SETTER + j);
PythonTree node = nodes.get(i * MAX_SETTINGS_PER_SETTER + j);
if (node instanceof Num) {
emitNum((Num)node, setter);
} else if (node instanceof Str) {
emitStr((Str)node, setter);
}
setter.aastore();
}
setter.return_();
code.dup();
code.invokestatic(this.classfile.name, "set$$" + setter_count,
sig(Void.TYPE, PyObject[].class));
setter_count++;
}
return true;
}
}