diff options
Diffstat (limited to 'applications/core/lib/Zend/Pdf/FileParser.php')
| -rw-r--r-- | applications/core/lib/Zend/Pdf/FileParser.php | 483 |
1 files changed, 483 insertions, 0 deletions
diff --git a/applications/core/lib/Zend/Pdf/FileParser.php b/applications/core/lib/Zend/Pdf/FileParser.php new file mode 100644 index 0000000..b8f2358 --- /dev/null +++ b/applications/core/lib/Zend/Pdf/FileParser.php @@ -0,0 +1,483 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to the new BSD license that is bundled + * with this package in the file LICENSE.txt. + * It is also available through the world-wide-web at this URL: + * http://framework.zend.com/license/new-bsd + * If you did not receive a copy of the license and are unable to + * obtain it through the world-wide-web, please send an email + * to [email protected] so we can send you a copy immediately. + * + * @package Zend_Pdf + * @subpackage FileParser + * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + */ + +/** + * Abstract utility class for parsing binary files. + * + * Provides a library of methods to quickly navigate and extract various data + * types (signed and unsigned integers, floating- and fixed-point numbers, + * strings, etc.) from the file. + * + * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object. + * This allows the same parser code to work with many different data sources: + * in-memory objects, filesystem files, etc. + * + * @package Zend_Pdf + * @subpackage FileParser + * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + */ +abstract class Zend_Pdf_FileParser +{ + /**** Class Constants ****/ + + /** + * Little-endian byte order (0x04 0x03 0x02 0x01). + */ + const BYTE_ORDER_LITTLE_ENDIAN = 0; + + /** + * Big-endian byte order (0x01 0x02 0x03 0x04). + */ + const BYTE_ORDER_BIG_ENDIAN = 1; + + + + /**** Instance Variables ****/ + + + /** + * Flag indicating that the file has passed a cursory validation check. + * @var boolean + */ + protected $_isScreened = false; + + /** + * Flag indicating that the file has been sucessfully parsed. + * @var boolean + */ + protected $_isParsed = false; + + /** + * Object representing the data source to be parsed. + * @var Zend_Pdf_FileParserDataSource + */ + protected $_dataSource = null; + + + + /**** Public Interface ****/ + + + /* Abstract Methods */ + + /** + * Performs a cursory check to verify that the binary file is in the expected + * format. Intended to quickly weed out obviously bogus files. + * + * Must set $this->_isScreened to true if successful. + * + * @throws Zend_Pdf_Exception + */ + abstract public function screen(); + + /** + * Reads and parses the complete binary file. + * + * Must set $this->_isParsed to true if successful. + * + * @throws Zend_Pdf_Exception + */ + abstract public function parse(); + + + /* Object Lifecycle */ + + /** + * Object constructor. + * + * Verifies that the data source has been properly initialized. + * + * @param Zend_Pdf_FileParserDataSource $dataSource + * @throws Zend_Pdf_Exception + */ + public function __construct(Zend_Pdf_FileParserDataSource $dataSource) + { + if ($dataSource->getSize() == 0) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception('The data source has not been properly initialized', + Zend_Pdf_Exception::BAD_DATA_SOURCE); + } + $this->_dataSource = $dataSource; + } + + /** + * Object destructor. + * + * Discards the data source object. + */ + public function __destruct() + { + $this->_dataSource = null; + } + + + /* Accessors */ + + /** + * Returns true if the file has passed a cursory validation check. + * + * @return boolean + */ + public function isScreened() + { + return $this->_isScreened; + } + + /** + * Returns true if the file has been successfully parsed. + * + * @return boolean + */ + public function isParsed() + { + return $this->_isParsed; + } + + /** + * Returns the data source object representing the file being parsed. + * + * @return Zend_Pdf_FileParserDataSource + */ + public function getDataSource() + { + return $this->_dataSource; + } + + + /* Primitive Methods */ + + /** + * Convenience wrapper for the data source object's moveToOffset() method. + * + * @param integer $offset Destination byte offset. + * @throws Zend_Pdf_Exception + */ + public function moveToOffset($offset) + { + $this->_dataSource->moveToOffset($offset); + } + + public function getOffset() { + return $this->_dataSource->getOffset(); + } + + public function getSize() { + return $this->_dataSource->getSize(); + } + + /** + * Convenience wrapper for the data source object's readBytes() method. + * + * @param integer $byteCount Number of bytes to read. + * @return string + * @throws Zend_Pdf_Exception + */ + public function readBytes($byteCount) + { + return $this->_dataSource->readBytes($byteCount); + } + + /** + * Convenience wrapper for the data source object's skipBytes() method. + * + * @param integer $byteCount Number of bytes to skip. + * @throws Zend_Pdf_Exception + */ + public function skipBytes($byteCount) + { + $this->_dataSource->skipBytes($byteCount); + } + + + /* Parser Methods */ + + /** + * Reads the signed integer value from the binary file at the current byte + * offset. + * + * Advances the offset by the number of bytes read. Throws an exception if + * an error occurs. + * + * @param integer $size Size of integer in bytes: 1-4 + * @param integer $byteOrder (optional) Big- or little-endian byte order. + * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. + * If omitted, uses big-endian. + * @return integer + * @throws Zend_Pdf_Exception + */ + public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) + { + if (($size < 1) || ($size > 4)) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Invalid signed integer size: $size", + Zend_Pdf_Exception::INVALID_INTEGER_SIZE); + } + $bytes = $this->_dataSource->readBytes($size); + /* unpack() will not work for this method because it always works in + * the host byte order for signed integers. It also does not allow for + * variable integer sizes. + */ + if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) { + $number = ord($bytes[0]); + if (($number & 0x80) == 0x80) { + /* This number is negative. Extract the positive equivalent. + */ + $number = (~ $number) & 0xff; + for ($i = 1; $i < $size; $i++) { + $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff); + } + /* Now turn this back into a negative number by taking the + * two's complement (we didn't add one above so won't + * subtract it below). This works reliably on both 32- and + * 64-bit systems. + */ + $number = ~$number; + } else { + for ($i = 1; $i < $size; $i++) { + $number = ($number << 8) | ord($bytes[$i]); + } + } + } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) { + $number = ord($bytes[$size - 1]); + if (($number & 0x80) == 0x80) { + /* Negative number. See discussion above. + */ + $number = 0; + for ($i = --$size; $i >= 0; $i--) { + $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8); + } + $number = ~$number; + } else { + $number = 0; + for ($i = --$size; $i >= 0; $i--) { + $number |= ord($bytes[$i]) << ($i * 8); + } + } + } else { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder", + Zend_Pdf_Exception::INVALID_BYTE_ORDER); + } + return $number; + } + + /** + * Reads the unsigned integer value from the binary file at the current byte + * offset. + * + * Advances the offset by the number of bytes read. Throws an exception if + * an error occurs. + * + * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the + * resulting value WILL BE SIGNED because PHP uses signed integers internally + * for everything. To guarantee portability, be sure to use bitwise operators + * operators on large unsigned integers! + * + * @param integer $size Size of integer in bytes: 1-4 + * @param integer $byteOrder (optional) Big- or little-endian byte order. + * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. + * If omitted, uses big-endian. + * @return integer + * @throws Zend_Pdf_Exception + */ + public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) + { + if (($size < 1) || ($size > 4)) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size", + Zend_Pdf_Exception::INVALID_INTEGER_SIZE); + } + $bytes = $this->_dataSource->readBytes($size); + /* unpack() is a bit heavyweight for this simple conversion. Just + * work the bytes directly. + */ + if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) { + $number = ord($bytes[0]); + for ($i = 1; $i < $size; $i++) { + $number = ($number << 8) | ord($bytes[$i]); + } + } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) { + $number = 0; + for ($i = --$size; $i >= 0; $i--) { + $number |= ord($bytes[$i]) << ($i * 8); + } + } else { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder", + Zend_Pdf_Exception::INVALID_BYTE_ORDER); + } + return $number; + } + + /** + * Returns true if the specified bit is set in the integer bitfield. + * + * @param integer $bit Bit number to test (i.e. - 0-31) + * @param integer $bitField + * @return boolean + */ + public function isBitSet($bit, $bitField) + { + $bitMask = 1 << $bit; + $isSet = (($bitField & $bitMask) == $bitMask); + return $isSet; + } + + /** + * Reads the signed fixed-point number from the binary file at the current + * byte offset. + * + * Common fixed-point sizes are 2.14 and 16.16. + * + * Advances the offset by the number of bytes read. Throws an exception if + * an error occurs. + * + * @param integer $mantissaBits Number of bits in the mantissa + * @param integer $fractionBits Number of bits in the fraction + * @param integer $byteOrder (optional) Big- or little-endian byte order. + * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. + * If omitted, uses big-endian. + * @return float + * @throws Zend_Pdf_Exception + */ + public function readFixed($mantissaBits, $fractionBits, + $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) + { + $bitsToRead = $mantissaBits + $fractionBits; + if (($bitsToRead % 8) !== 0) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes', + Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE); + } + $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits); + return $number; + } + + /** + * Reads the Unicode UTF-16-encoded string from the binary file at the + * current byte offset. + * + * The byte order of the UTF-16 string must be specified. You must also + * supply the desired resulting character set. + * + * Advances the offset by the number of bytes read. Throws an exception if + * an error occurs. + * + * @todo Consider changing $byteCount to a character count. They are not + * always equivalent (in the case of surrogates). + * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the + * string being extracted. + * + * @param integer $byteCount Number of bytes (characters * 2) to return. + * @param integer $byteOrder (optional) Big- or little-endian byte order. + * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}. + * If omitted, uses big-endian. + * @param string $characterSet (optional) Desired resulting character set. + * You may use any character set supported by {@link iconv()}. If omitted, + * uses 'current locale'. + * @return string + * @throws Zend_Pdf_Exception + */ + public function readStringUTF16($byteCount, + $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN, + $characterSet = '') + { + if ($byteCount == 0) { + return ''; + } + $bytes = $this->_dataSource->readBytes($byteCount); + if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) { + if ($characterSet == 'UTF-16BE') { + return $bytes; + } + return iconv('UTF-16BE', $characterSet, $bytes); + } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) { + if ($characterSet == 'UTF-16LE') { + return $bytes; + } + return iconv('UTF-16LE', $characterSet, $bytes); + } else { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder", + Zend_Pdf_Exception::INVALID_BYTE_ORDER); + } + } + + /** + * Reads the Mac Roman-encoded string from the binary file at the current + * byte offset. + * + * You must supply the desired resulting character set. + * + * Advances the offset by the number of bytes read. Throws an exception if + * an error occurs. + * + * @param integer $byteCount Number of bytes (characters) to return. + * @param string $characterSet (optional) Desired resulting character set. + * You may use any character set supported by {@link iconv()}. If omitted, + * uses 'current locale'. + * @return string + * @throws Zend_Pdf_Exception + */ + public function readStringMacRoman($byteCount, $characterSet = '') + { + if ($byteCount == 0) { + return ''; + } + $bytes = $this->_dataSource->readBytes($byteCount); + if ($characterSet == 'MacRoman') { + return $bytes; + } + return iconv('MacRoman', $characterSet, $bytes); + } + + /** + * Reads the Pascal string from the binary file at the current byte offset. + * + * The length of the Pascal string is determined by reading the length bytes + * which preceed the character data. You must supply the desired resulting + * character set. + * + * Advances the offset by the number of bytes read. Throws an exception if + * an error occurs. + * + * @param string $characterSet (optional) Desired resulting character set. + * You may use any character set supported by {@link iconv()}. If omitted, + * uses 'current locale'. + * @param integer $lengthBytes (optional) Number of bytes that make up the + * length. Default is 1. + * @return string + * @throws Zend_Pdf_Exception + */ + public function readStringPascal($characterSet = '', $lengthBytes = 1) + { + $byteCount = $this->readUInt($lengthBytes); + if ($byteCount == 0) { + return ''; + } + $bytes = $this->_dataSource->readBytes($byteCount); + if ($characterSet == 'ASCII') { + return $bytes; + } + return iconv('ASCII', $characterSet, $bytes); + } + +} |
