Another ugly inlining hack, expanding the two PyDict_GetItem() calls
in LOAD_GLOBAL.  Besides saving a C function call, it saves checks
whether f_globals and f_builtins are dicts, and extracting and testing
the string object's hash code is done only once.  We bail out of the
inlining if the name is not exactly a string, or when its hash is -1;
because of interning, neither should ever happen.  I believe interning
guarantees that the hash code is set, and I believe that the 'names'
tuple of a code object always contains interned strings, but I'm not
assuming that -- I'm simply testing hash != -1.

On my home machine, this makes a pystone variant with new-style
classes and slots run at the same speed as classic pystone!  (With
new-style classes but without slots, it is still a lot slower.)
diff --git a/Python/ceval.c b/Python/ceval.c
index 4f24bab..af9c072 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -1709,13 +1709,37 @@
 
 		case LOAD_GLOBAL:
 			w = GETITEM(names, oparg);
+			if (PyString_CheckExact(w)) {
+				long hash = ((PyStringObject *)w)->ob_shash;
+				if (hash != -1) {
+					/* Inline the PyDict_GetItem() calls */
+					PyDictObject *d;
+					d = (PyDictObject *)(f->f_globals);
+					x = d->ma_lookup(d, w, hash)->me_value;
+					if (x != NULL) {
+						Py_INCREF(x);
+						PUSH(x);
+						continue;
+					}
+					d = (PyDictObject *)(f->f_builtins);
+					x = d->ma_lookup(d, w, hash)->me_value;
+					if (x != NULL) {
+						Py_INCREF(x);
+						PUSH(x);
+						continue;
+					}
+					goto load_global_error;
+				}
+			}
+			/* This is the un-inlined version of the code above */
 			x = PyDict_GetItem(f->f_globals, w);
 			if (x == NULL) {
 				x = PyDict_GetItem(f->f_builtins, w);
 				if (x == NULL) {
+				  load_global_error:
 					format_exc_check_arg(
 						    PyExc_NameError,
-						    GLOBAL_NAME_ERROR_MSG ,w);
+						    GLOBAL_NAME_ERROR_MSG, w);
 					break;
 				}
 			}