diff options
Diffstat (limited to 'applications/core/lib/Zend/Pdf/StringParser.php')
| -rw-r--r-- | applications/core/lib/Zend/Pdf/StringParser.php | 709 |
1 files changed, 709 insertions, 0 deletions
diff --git a/applications/core/lib/Zend/Pdf/StringParser.php b/applications/core/lib/Zend/Pdf/StringParser.php new file mode 100644 index 0000000..9f8f939 --- /dev/null +++ b/applications/core/lib/Zend/Pdf/StringParser.php @@ -0,0 +1,709 @@ +<?php +/** + * Zend Framework + * + * LICENSE + * + * This source file is subject to the new BSD license that is bundled + * with this package in the file LICENSE.txt. + * It is also available through the world-wide-web at this URL: + * http://framework.zend.com/license/new-bsd + * If you did not receive a copy of the license and are unable to + * obtain it through the world-wide-web, please send an email + * to [email protected] so we can send you a copy immediately. + * + * @package Zend_Pdf + * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + */ + + +/** Zend_Pdf_Element */ +require_once 'Zend/Pdf/Element.php'; + +/** Zend_Pdf_Element_Array */ +require_once 'Zend/Pdf/Element/Array.php'; + +/** Zend_Pdf_Element_String_Binary */ +require_once 'Zend/Pdf/Element/String/Binary.php'; + +/** Zend_Pdf_Element_Boolean */ +require_once 'Zend/Pdf/Element/Boolean.php'; + +/** Zend_Pdf_Element_Dictionary */ +require_once 'Zend/Pdf/Element/Dictionary.php'; + +/** Zend_Pdf_Element_Name */ +require_once 'Zend/Pdf/Element/Name.php'; + +/** Zend_Pdf_Element_Numeric */ +require_once 'Zend/Pdf/Element/Numeric.php'; + +/** Zend_Pdf_Element_Object */ +require_once 'Zend/Pdf/Element/Object.php'; + +/** Zend_Pdf_Element_Reference */ +require_once 'Zend/Pdf/Element/Reference.php'; + +/** Zend_Pdf_Element_Object_Stream */ +require_once 'Zend/Pdf/Element/Object/Stream.php'; + +/** Zend_Pdf_Element_String */ +require_once 'Zend/Pdf/Element/String.php'; + +/** Zend_Pdf_Element_Null */ +require_once 'Zend/Pdf/Element/Null.php'; + +/** Zend_Pdf_Element_Reference_Context */ +require_once 'Zend/Pdf/Element/Reference/Context.php'; + +/** Zend_Pdf_Element_Reference_Table */ +require_once 'Zend/Pdf/Element/Reference/Table.php'; + +/** Zend_Pdf_ElementFactory_Interface */ +require_once 'Zend/Pdf/ElementFactory/Interface.php'; + +/** Zend_Pdf_PhpArray */ +require_once 'Zend/Pdf/PhpArray.php'; + + +/** + * PDF string parser + * + * @package Zend_Pdf + * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + */ +class Zend_Pdf_StringParser +{ + /** + * Source PDF + * + * @var string + */ + public $data = ''; + + /** + * Current position in a data + * + * @var integer + */ + public $offset = 0; + + /** + * Current reference context + * + * @var Zend_Pdf_Element_Reference_Context + */ + private $_context = null; + + /** + * Array of elements of the currently parsed object/trailer + * + * @var array + */ + private $_elements = array(); + + /** + * PDF objects factory. + * + * @var Zend_Pdf_ElementFactory_Interface + */ + private $_objFactory = null; + + + /** + * Clean up resources. + * + * Clear current state to remove cyclic object references + */ + public function cleanUp() + { + $this->_context = null; + $this->_elements = array(); + $this->_objFactory = null; + } + + /** + * Character with code $chCode is white space + * + * @param integer $chCode + * @return boolean + */ + public static function isWhiteSpace($chCode) + { + if ($chCode == 0x00 || // null character + $chCode == 0x09 || // Tab + $chCode == 0x0A || // Line feed + $chCode == 0x0C || // Form Feed + $chCode == 0x0D || // Carriage return + $chCode == 0x20 // Space + ) { + return true; + } else { + return false; + } + } + + + /** + * Character with code $chCode is a delimiter character + * + * @param integer $chCode + * @return boolean + */ + public static function isDelimiter($chCode ) + { + if ($chCode == 0x28 || // '(' + $chCode == 0x29 || // ')' + $chCode == 0x3C || // '<' + $chCode == 0x3E || // '>' + $chCode == 0x5B || // '[' + $chCode == 0x5D || // ']' + $chCode == 0x7B || // '{' + $chCode == 0x7D || // '}' + $chCode == 0x2F || // '/' + $chCode == 0x25 // '%' + ) { + return true; + } else { + return false; + } + } + + + /** + * Skip white space + * + * @param boolean $skipComment + */ + public function skipWhiteSpace($skipComment = true) + { + while ($this->offset < strlen($this->data)) { + if (self::isWhiteSpace( ord($this->data[$this->offset]) )) { + $this->offset++; + } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%' + $this->skipComment(); + } else { + return; + } + } + } + + + /** + * Skip comment + */ + public function skipComment() + { + while ($this->offset < strlen($this->data)) + { + if (ord($this->data[$this->offset]) != 0x0A || // Line feed + ord($this->data[$this->offset]) != 0x0d // Carriage return + ) { + $this->offset++; + } else { + return; + } + } + } + + + /** + * Read comment line + * + * @return string + */ + public function readComment() + { + $this->skipWhiteSpace(false); + + /** Check if it's a comment line */ + if ($this->data[$this->offset] != '%') { + return ''; + } + + for ($start = $this->offset; + $this->offset < strlen($this->data); + $this->offset++) { + if (ord($this->data[$this->offset]) == 0x0A || // Line feed + ord($this->data[$this->offset]) == 0x0d // Carriage return + ) { + break; + } + } + + return substr($this->data, $start, $this->offset-$start); + } + + + /** + * Returns next lexeme from a pdf stream + * + * @return string + */ + public function readLexeme() + { + $this->skipWhiteSpace(); + + if ($this->offset >= strlen($this->data)) { + return ''; + } + + $start = $this->offset; + + if (self::isDelimiter( ord($this->data[$start]) )) { + if ($this->data[$start] == '<' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '<') { + $this->offset += 2; + return '<<'; + } else if ($this->data[$start] == '>' && $this->offset + 1 < strlen($this->data) && $this->data[$start+1] == '>') { + $this->offset += 2; + return '>>'; + } else { + $this->offset++; + return $this->data[$start]; + } + } else { + while ( ($this->offset < strlen($this->data)) && + (!self::isDelimiter( ord($this->data[$this->offset]) )) && + (!self::isWhiteSpace( ord($this->data[$this->offset]) )) ) { + $this->offset++; + } + + return substr($this->data, $start, $this->offset - $start); + } + } + + + /** + * Read elemental object from a PDF stream + * + * @return Zend_Pdf_Element + * @throws Zend_Pdf_Exception + */ + public function readElement($nextLexeme = null) + { + if ($nextLexeme === null) { + $nextLexeme = $this->readLexeme(); + } + + /** + * Note: readElement() method is a public method and could be invoked from other classes. + * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care + * about _elements member management. + */ + switch ($nextLexeme) { + case '(': + return ($this->_elements[] = $this->_readString()); + + case '<': + return ($this->_elements[] = $this->_readBinaryString()); + + case '/': + return ($this->_elements[] = new Zend_Pdf_Element_Name( + Zend_Pdf_Element_Name::unescape( $this->readLexeme() ) + )); + + case '[': + return ($this->_elements[] = $this->_readArray()); + + case '<<': + return ($this->_elements[] = $this->_readDictionary()); + + case ')': + // fall through to next case + case '>': + // fall through to next case + case ']': + // fall through to next case + case '>>': + // fall through to next case + case '{': + // fall through to next case + case '}': + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.', + $this->offset)); + + default: + if (strcasecmp($nextLexeme, 'true') == 0) { + return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true)); + } else if (strcasecmp($nextLexeme, 'false') == 0) { + return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false)); + } else if (strcasecmp($nextLexeme, 'null') == 0) { + return ($this->_elements[] = new Zend_Pdf_Element_Null()); + } + + $ref = $this->_readReference($nextLexeme); + if ($ref !== null) { + return ($this->_elements[] = $ref); + } + + return ($this->_elements[] = $this->_readNumeric($nextLexeme)); + } + } + + + /** + * Read string PDF object + * Also reads trailing ')' from a pdf stream + * + * @return Zend_Pdf_Element_String + * @throws Zend_Pdf_Exception + */ + private function _readString() + { + $start = $this->offset; + $openedBrackets = 1; + + while ($this->offset < strlen($this->data)) { + switch (ord( $this->data[$this->offset] )) { + case 0x28: // '(' - opened bracket in the string, needs balanced pair. + $openedBrackets++; + break; + + case 0x29: // ')' - pair to the opened bracket + $openedBrackets--; + break; + + case 0x5C: // '\\' - escape sequence, skip next char from a check + $this->offset++; + } + + $this->offset++; + if ($openedBrackets == 0) { + break; // end of string + } + } + if ($openedBrackets != 0) { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start)); + } + + return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data, + $start, + $this->offset - $start - 1) )); + } + + + /** + * Read binary string PDF object + * Also reads trailing '>' from a pdf stream + * + * @return Zend_Pdf_Element_String_Binary + * @throws Zend_Pdf_Exception + */ + private function _readBinaryString() + { + $start = $this->offset; + + while ($this->offset < strlen($this->data)) { + if (self::isWhiteSpace( ord($this->data[$this->offset]) ) || + ctype_xdigit( $this->data[$this->offset] ) ) { + $this->offset++; + } else if ($this->data[$this->offset] == '>') { + $this->offset++; + return new Zend_Pdf_Element_String_Binary( + Zend_Pdf_Element_String_Binary::unescape( substr($this->data, + $start, + $this->offset - $start - 1) )); + } else { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset)); + } + } + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while binary string reading. Offset - 0x%X. \'>\' expected.', $start)); + } + + + /** + * Read array PDF object + * Also reads trailing ']' from a pdf stream + * + * @return Zend_Pdf_Element_Array + * @throws Zend_Pdf_Exception + */ + private function _readArray() + { + $elements = array(); + + while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { + if ($nextLexeme != ']') { + $elements[] = $this->readElement($nextLexeme); + } else { + return new Zend_Pdf_Element_Array($elements); + } + } + + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset)); + } + + + /** + * Read dictionary PDF object + * Also reads trailing '>>' from a pdf stream + * + * @return Zend_Pdf_Element_Dictionary + * @throws Zend_Pdf_Exception + */ + private function _readDictionary() + { + $dictionary = new Zend_Pdf_Element_Dictionary(); + + while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { + if ($nextLexeme != '>>') { + $nameStart = $this->offset - strlen($nextLexeme); + + $name = $this->readElement($nextLexeme); + $value = $this->readElement(); + + if (!$name instanceof Zend_Pdf_Element_Name) { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart)); + } + + $dictionary->add($name, $value); + } else { + return $dictionary; + } + } + + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset)); + } + + + /** + * Read reference PDF object + * + * @param string $nextLexeme + * @return Zend_Pdf_Element_Reference + */ + private function _readReference($nextLexeme = null) + { + $start = $this->offset; + + if ($nextLexeme === null) { + $objNum = $this->readLexeme(); + } else { + $objNum = $nextLexeme; + } + if (!ctype_digit($objNum)) { // it's not a reference + $this->offset = $start; + return null; + } + + $genNum = $this->readLexeme(); + if (!ctype_digit($genNum)) { // it's not a reference + $this->offset = $start; + return null; + } + + $rMark = $this->readLexeme(); + if ($rMark != 'R') { // it's not a reference + $this->offset = $start; + return null; + } + + $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve()); + + return $ref; + } + + + /** + * Read numeric PDF object + * + * @param string $nextLexeme + * @return Zend_Pdf_Element_Numeric + */ + private function _readNumeric($nextLexeme = null) + { + if ($nextLexeme === null) { + $nextLexeme = $this->readLexeme(); + } + + return new Zend_Pdf_Element_Numeric($nextLexeme); + } + + + /** + * Read inderect object from a PDF stream + * + * @param integer $offset + * @param Zend_Pdf_Element_Reference_Context $context + * @return Zend_Pdf_Element_Object + */ + public function getObject($offset, Zend_Pdf_Element_Reference_Context $context) + { + if ($offset === null ) { + return new Zend_Pdf_Element_Null(); + } + + // Save current offset to make getObject() reentrant + $offsetSave = $this->offset; + + $this->offset = $offset; + $this->_context = $context; + $this->_elements = array(); + + $objNum = $this->readLexeme(); + if (!ctype_digit($objNum)) { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum))); + } + + $genNum = $this->readLexeme(); + if (!ctype_digit($genNum)) { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum))); + } + + $objKeyword = $this->readLexeme(); + if ($objKeyword != 'obj') { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword))); + } + + $objValue = $this->readElement(); + + $nextLexeme = $this->readLexeme(); + + if( $nextLexeme == 'endobj' ) { + /** + * Object is not generated by factory (thus it's not marked as modified object). + * But factory is assigned to the obect. + */ + $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve()); + + foreach ($this->_elements as $element) { + $element->setParentObject($obj); + } + + // Restore offset value + $this->offset = $offsetSave; + + return $obj; + } + + /** + * It's a stream object + */ + if ($nextLexeme != 'stream') { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme))); + } + + if (!$objValue instanceof Zend_Pdf_Element_Dictionary) { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme))); + } + + /** + * References are automatically dereferenced at this moment. + */ + $streamLength = $objValue->Length->value; + + /** + * 'stream' keyword must be followed by either cr-lf sequence or lf character only. + * This restriction gives the possibility to recognize all cases exactly + */ + if ($this->data[$this->offset] == "\r" && + $this->data[$this->offset + 1] == "\n" ) { + $this->offset += 2; + } else if ($this->data[$this->offset] == "\n" ) { + $this->offset++; + } else { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme))); + } + + $dataOffset = $this->offset; + + $this->offset += $streamLength; + + $nextLexeme = $this->readLexeme(); + if ($nextLexeme != 'endstream') { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme))); + } + + $nextLexeme = $this->readLexeme(); + if ($nextLexeme != 'endobj') { + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme))); + } + + $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data, + $dataOffset, + $streamLength), + (int)$objNum, + (int)$genNum, + $this->_objFactory->resolve(), + $objValue); + + foreach ($this->_elements as $element) { + $element->setParentObject($obj); + } + + // Restore offset value + $this->offset = $offsetSave; + + return $obj; + } + + + /** + * Get length of source string + * + * @return integer + */ + public function getLength() + { + return strlen($this->data); + } + + /** + * Get source string + * + * @return string + */ + public function getString() + { + return $this->data; + } + + + /** + * Parse integer value from a binary stream + * + * @param string $stream + * @param integer $offset + * @param integer $size + * @return integer + */ + public static function parseIntFromStream($stream, $offset, $size) + { + $value = 0; + for ($count = 0; $count < $size; $count++) { + $value *= 256; + $value += ord($stream[$offset + $count]); + } + + return $value; + } + + + + /** + * Set current context + * + * @param Zend_Pdf_Element_Reference_Context $context + */ + public function setContext(Zend_Pdf_Element_Reference_Context $context) + { + $this->_context = $context; + } + + /** + * Object constructor + * + * Note: PHP duplicates string, which is sent by value, only of it's updated. + * Thus we don't need to care about overhead + * + * @param string $pdfString + * @param Zend_Pdf_ElementFactory_Interface $factory + */ + public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory) + { + $this->data = $source; + $this->_objFactory = $factory; + } +} |
