summaryrefslogtreecommitdiff
path: root/applications/core/lib/Zend/Pdf/FileParser.php
diff options
Diffstat (limited to 'applications/core/lib/Zend/Pdf/FileParser.php')
-rw-r--r--applications/core/lib/Zend/Pdf/FileParser.php483
1 files changed, 483 insertions, 0 deletions
diff --git a/applications/core/lib/Zend/Pdf/FileParser.php b/applications/core/lib/Zend/Pdf/FileParser.php
new file mode 100644
index 0000000..b8f2358
--- /dev/null
+++ b/applications/core/lib/Zend/Pdf/FileParser.php
@@ -0,0 +1,483 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to [email protected] so we can send you a copy immediately.
+ *
+ * @package Zend_Pdf
+ * @subpackage FileParser
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+
+/**
+ * Abstract utility class for parsing binary files.
+ *
+ * Provides a library of methods to quickly navigate and extract various data
+ * types (signed and unsigned integers, floating- and fixed-point numbers,
+ * strings, etc.) from the file.
+ *
+ * File access is managed via a {@link Zend_Pdf_FileParserDataSource} object.
+ * This allows the same parser code to work with many different data sources:
+ * in-memory objects, filesystem files, etc.
+ *
+ * @package Zend_Pdf
+ * @subpackage FileParser
+ * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+abstract class Zend_Pdf_FileParser
+{
+ /**** Class Constants ****/
+
+ /**
+ * Little-endian byte order (0x04 0x03 0x02 0x01).
+ */
+ const BYTE_ORDER_LITTLE_ENDIAN = 0;
+
+ /**
+ * Big-endian byte order (0x01 0x02 0x03 0x04).
+ */
+ const BYTE_ORDER_BIG_ENDIAN = 1;
+
+
+
+ /**** Instance Variables ****/
+
+
+ /**
+ * Flag indicating that the file has passed a cursory validation check.
+ * @var boolean
+ */
+ protected $_isScreened = false;
+
+ /**
+ * Flag indicating that the file has been sucessfully parsed.
+ * @var boolean
+ */
+ protected $_isParsed = false;
+
+ /**
+ * Object representing the data source to be parsed.
+ * @var Zend_Pdf_FileParserDataSource
+ */
+ protected $_dataSource = null;
+
+
+
+ /**** Public Interface ****/
+
+
+ /* Abstract Methods */
+
+ /**
+ * Performs a cursory check to verify that the binary file is in the expected
+ * format. Intended to quickly weed out obviously bogus files.
+ *
+ * Must set $this->_isScreened to true if successful.
+ *
+ * @throws Zend_Pdf_Exception
+ */
+ abstract public function screen();
+
+ /**
+ * Reads and parses the complete binary file.
+ *
+ * Must set $this->_isParsed to true if successful.
+ *
+ * @throws Zend_Pdf_Exception
+ */
+ abstract public function parse();
+
+
+ /* Object Lifecycle */
+
+ /**
+ * Object constructor.
+ *
+ * Verifies that the data source has been properly initialized.
+ *
+ * @param Zend_Pdf_FileParserDataSource $dataSource
+ * @throws Zend_Pdf_Exception
+ */
+ public function __construct(Zend_Pdf_FileParserDataSource $dataSource)
+ {
+ if ($dataSource->getSize() == 0) {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception('The data source has not been properly initialized',
+ Zend_Pdf_Exception::BAD_DATA_SOURCE);
+ }
+ $this->_dataSource = $dataSource;
+ }
+
+ /**
+ * Object destructor.
+ *
+ * Discards the data source object.
+ */
+ public function __destruct()
+ {
+ $this->_dataSource = null;
+ }
+
+
+ /* Accessors */
+
+ /**
+ * Returns true if the file has passed a cursory validation check.
+ *
+ * @return boolean
+ */
+ public function isScreened()
+ {
+ return $this->_isScreened;
+ }
+
+ /**
+ * Returns true if the file has been successfully parsed.
+ *
+ * @return boolean
+ */
+ public function isParsed()
+ {
+ return $this->_isParsed;
+ }
+
+ /**
+ * Returns the data source object representing the file being parsed.
+ *
+ * @return Zend_Pdf_FileParserDataSource
+ */
+ public function getDataSource()
+ {
+ return $this->_dataSource;
+ }
+
+
+ /* Primitive Methods */
+
+ /**
+ * Convenience wrapper for the data source object's moveToOffset() method.
+ *
+ * @param integer $offset Destination byte offset.
+ * @throws Zend_Pdf_Exception
+ */
+ public function moveToOffset($offset)
+ {
+ $this->_dataSource->moveToOffset($offset);
+ }
+
+ public function getOffset() {
+ return $this->_dataSource->getOffset();
+ }
+
+ public function getSize() {
+ return $this->_dataSource->getSize();
+ }
+
+ /**
+ * Convenience wrapper for the data source object's readBytes() method.
+ *
+ * @param integer $byteCount Number of bytes to read.
+ * @return string
+ * @throws Zend_Pdf_Exception
+ */
+ public function readBytes($byteCount)
+ {
+ return $this->_dataSource->readBytes($byteCount);
+ }
+
+ /**
+ * Convenience wrapper for the data source object's skipBytes() method.
+ *
+ * @param integer $byteCount Number of bytes to skip.
+ * @throws Zend_Pdf_Exception
+ */
+ public function skipBytes($byteCount)
+ {
+ $this->_dataSource->skipBytes($byteCount);
+ }
+
+
+ /* Parser Methods */
+
+ /**
+ * Reads the signed integer value from the binary file at the current byte
+ * offset.
+ *
+ * Advances the offset by the number of bytes read. Throws an exception if
+ * an error occurs.
+ *
+ * @param integer $size Size of integer in bytes: 1-4
+ * @param integer $byteOrder (optional) Big- or little-endian byte order.
+ * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
+ * If omitted, uses big-endian.
+ * @return integer
+ * @throws Zend_Pdf_Exception
+ */
+ public function readInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
+ {
+ if (($size < 1) || ($size > 4)) {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception("Invalid signed integer size: $size",
+ Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
+ }
+ $bytes = $this->_dataSource->readBytes($size);
+ /* unpack() will not work for this method because it always works in
+ * the host byte order for signed integers. It also does not allow for
+ * variable integer sizes.
+ */
+ if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
+ $number = ord($bytes[0]);
+ if (($number & 0x80) == 0x80) {
+ /* This number is negative. Extract the positive equivalent.
+ */
+ $number = (~ $number) & 0xff;
+ for ($i = 1; $i < $size; $i++) {
+ $number = ($number << 8) | ((~ ord($bytes[$i])) & 0xff);
+ }
+ /* Now turn this back into a negative number by taking the
+ * two's complement (we didn't add one above so won't
+ * subtract it below). This works reliably on both 32- and
+ * 64-bit systems.
+ */
+ $number = ~$number;
+ } else {
+ for ($i = 1; $i < $size; $i++) {
+ $number = ($number << 8) | ord($bytes[$i]);
+ }
+ }
+ } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
+ $number = ord($bytes[$size - 1]);
+ if (($number & 0x80) == 0x80) {
+ /* Negative number. See discussion above.
+ */
+ $number = 0;
+ for ($i = --$size; $i >= 0; $i--) {
+ $number |= ((~ ord($bytes[$i])) & 0xff) << ($i * 8);
+ }
+ $number = ~$number;
+ } else {
+ $number = 0;
+ for ($i = --$size; $i >= 0; $i--) {
+ $number |= ord($bytes[$i]) << ($i * 8);
+ }
+ }
+ } else {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
+ Zend_Pdf_Exception::INVALID_BYTE_ORDER);
+ }
+ return $number;
+ }
+
+ /**
+ * Reads the unsigned integer value from the binary file at the current byte
+ * offset.
+ *
+ * Advances the offset by the number of bytes read. Throws an exception if
+ * an error occurs.
+ *
+ * NOTE: If you ask for a 4-byte unsigned integer on a 32-bit machine, the
+ * resulting value WILL BE SIGNED because PHP uses signed integers internally
+ * for everything. To guarantee portability, be sure to use bitwise operators
+ * operators on large unsigned integers!
+ *
+ * @param integer $size Size of integer in bytes: 1-4
+ * @param integer $byteOrder (optional) Big- or little-endian byte order.
+ * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
+ * If omitted, uses big-endian.
+ * @return integer
+ * @throws Zend_Pdf_Exception
+ */
+ public function readUInt($size, $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
+ {
+ if (($size < 1) || ($size > 4)) {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception("Invalid unsigned integer size: $size",
+ Zend_Pdf_Exception::INVALID_INTEGER_SIZE);
+ }
+ $bytes = $this->_dataSource->readBytes($size);
+ /* unpack() is a bit heavyweight for this simple conversion. Just
+ * work the bytes directly.
+ */
+ if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
+ $number = ord($bytes[0]);
+ for ($i = 1; $i < $size; $i++) {
+ $number = ($number << 8) | ord($bytes[$i]);
+ }
+ } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
+ $number = 0;
+ for ($i = --$size; $i >= 0; $i--) {
+ $number |= ord($bytes[$i]) << ($i * 8);
+ }
+ } else {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
+ Zend_Pdf_Exception::INVALID_BYTE_ORDER);
+ }
+ return $number;
+ }
+
+ /**
+ * Returns true if the specified bit is set in the integer bitfield.
+ *
+ * @param integer $bit Bit number to test (i.e. - 0-31)
+ * @param integer $bitField
+ * @return boolean
+ */
+ public function isBitSet($bit, $bitField)
+ {
+ $bitMask = 1 << $bit;
+ $isSet = (($bitField & $bitMask) == $bitMask);
+ return $isSet;
+ }
+
+ /**
+ * Reads the signed fixed-point number from the binary file at the current
+ * byte offset.
+ *
+ * Common fixed-point sizes are 2.14 and 16.16.
+ *
+ * Advances the offset by the number of bytes read. Throws an exception if
+ * an error occurs.
+ *
+ * @param integer $mantissaBits Number of bits in the mantissa
+ * @param integer $fractionBits Number of bits in the fraction
+ * @param integer $byteOrder (optional) Big- or little-endian byte order.
+ * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
+ * If omitted, uses big-endian.
+ * @return float
+ * @throws Zend_Pdf_Exception
+ */
+ public function readFixed($mantissaBits, $fractionBits,
+ $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN)
+ {
+ $bitsToRead = $mantissaBits + $fractionBits;
+ if (($bitsToRead % 8) !== 0) {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception('Fixed-point numbers are whole bytes',
+ Zend_Pdf_Exception::BAD_FIXED_POINT_SIZE);
+ }
+ $number = $this->readInt(($bitsToRead >> 3), $byteOrder) / (1 << $fractionBits);
+ return $number;
+ }
+
+ /**
+ * Reads the Unicode UTF-16-encoded string from the binary file at the
+ * current byte offset.
+ *
+ * The byte order of the UTF-16 string must be specified. You must also
+ * supply the desired resulting character set.
+ *
+ * Advances the offset by the number of bytes read. Throws an exception if
+ * an error occurs.
+ *
+ * @todo Consider changing $byteCount to a character count. They are not
+ * always equivalent (in the case of surrogates).
+ * @todo Make $byteOrder optional if there is a byte-order mark (BOM) in the
+ * string being extracted.
+ *
+ * @param integer $byteCount Number of bytes (characters * 2) to return.
+ * @param integer $byteOrder (optional) Big- or little-endian byte order.
+ * Use the BYTE_ORDER_ constants defined in {@link Zend_Pdf_FileParser}.
+ * If omitted, uses big-endian.
+ * @param string $characterSet (optional) Desired resulting character set.
+ * You may use any character set supported by {@link iconv()}. If omitted,
+ * uses 'current locale'.
+ * @return string
+ * @throws Zend_Pdf_Exception
+ */
+ public function readStringUTF16($byteCount,
+ $byteOrder = Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN,
+ $characterSet = '')
+ {
+ if ($byteCount == 0) {
+ return '';
+ }
+ $bytes = $this->_dataSource->readBytes($byteCount);
+ if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_BIG_ENDIAN) {
+ if ($characterSet == 'UTF-16BE') {
+ return $bytes;
+ }
+ return iconv('UTF-16BE', $characterSet, $bytes);
+ } else if ($byteOrder == Zend_Pdf_FileParser::BYTE_ORDER_LITTLE_ENDIAN) {
+ if ($characterSet == 'UTF-16LE') {
+ return $bytes;
+ }
+ return iconv('UTF-16LE', $characterSet, $bytes);
+ } else {
+ require_once 'Zend/Pdf/Exception.php';
+ throw new Zend_Pdf_Exception("Invalid byte order: $byteOrder",
+ Zend_Pdf_Exception::INVALID_BYTE_ORDER);
+ }
+ }
+
+ /**
+ * Reads the Mac Roman-encoded string from the binary file at the current
+ * byte offset.
+ *
+ * You must supply the desired resulting character set.
+ *
+ * Advances the offset by the number of bytes read. Throws an exception if
+ * an error occurs.
+ *
+ * @param integer $byteCount Number of bytes (characters) to return.
+ * @param string $characterSet (optional) Desired resulting character set.
+ * You may use any character set supported by {@link iconv()}. If omitted,
+ * uses 'current locale'.
+ * @return string
+ * @throws Zend_Pdf_Exception
+ */
+ public function readStringMacRoman($byteCount, $characterSet = '')
+ {
+ if ($byteCount == 0) {
+ return '';
+ }
+ $bytes = $this->_dataSource->readBytes($byteCount);
+ if ($characterSet == 'MacRoman') {
+ return $bytes;
+ }
+ return iconv('MacRoman', $characterSet, $bytes);
+ }
+
+ /**
+ * Reads the Pascal string from the binary file at the current byte offset.
+ *
+ * The length of the Pascal string is determined by reading the length bytes
+ * which preceed the character data. You must supply the desired resulting
+ * character set.
+ *
+ * Advances the offset by the number of bytes read. Throws an exception if
+ * an error occurs.
+ *
+ * @param string $characterSet (optional) Desired resulting character set.
+ * You may use any character set supported by {@link iconv()}. If omitted,
+ * uses 'current locale'.
+ * @param integer $lengthBytes (optional) Number of bytes that make up the
+ * length. Default is 1.
+ * @return string
+ * @throws Zend_Pdf_Exception
+ */
+ public function readStringPascal($characterSet = '', $lengthBytes = 1)
+ {
+ $byteCount = $this->readUInt($lengthBytes);
+ if ($byteCount == 0) {
+ return '';
+ }
+ $bytes = $this->_dataSource->readBytes($byteCount);
+ if ($characterSet == 'ASCII') {
+ return $bytes;
+ }
+ return iconv('ASCII', $characterSet, $bytes);
+ }
+
+}