Xml.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11. * @link https://cakephp.org CakePHP(tm) Project
  12. * @since 0.10.3
  13. * @license https://opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use Cake\Utility\Exception\XmlException;
  17. use DOMDocument;
  18. use DOMNode;
  19. use DOMText;
  20. use Exception;
  21. use SimpleXMLElement;
  22. /**
  23. * XML handling for CakePHP.
  24. *
  25. * The methods in these classes enable the datasources that use XML to work.
  26. */
  27. class Xml
  28. {
  29. /**
  30. * Initialize SimpleXMLElement or DOMDocument from a given XML string, file path, URL or array.
  31. *
  32. * ### Usage:
  33. *
  34. * Building XML from a string:
  35. *
  36. * ```
  37. * $xml = Xml::build('<example>text</example>');
  38. * ```
  39. *
  40. * Building XML from string (output DOMDocument):
  41. *
  42. * ```
  43. * $xml = Xml::build('<example>text</example>', ['return' => 'domdocument']);
  44. * ```
  45. *
  46. * Building XML from a file path:
  47. *
  48. * ```
  49. * $xml = Xml::build('/path/to/an/xml/file.xml');
  50. * ```
  51. *
  52. * Building XML from a remote URL:
  53. *
  54. * ```
  55. * use Cake\Http\Client;
  56. *
  57. * $http = new Client();
  58. * $response = $http->get('http://example.com/example.xml');
  59. * $xml = Xml::build($response->body());
  60. * ```
  61. *
  62. * Building from an array:
  63. *
  64. * ```
  65. * $value = [
  66. * 'tags' => [
  67. * 'tag' => [
  68. * [
  69. * 'id' => '1',
  70. * 'name' => 'defect'
  71. * ],
  72. * [
  73. * 'id' => '2',
  74. * 'name' => 'enhancement'
  75. * ]
  76. * ]
  77. * ]
  78. * ];
  79. * $xml = Xml::build($value);
  80. * ```
  81. *
  82. * When building XML from an array ensure that there is only one top level element.
  83. *
  84. * ### Options
  85. *
  86. * - `return` Can be 'simplexml' to return object of SimpleXMLElement or 'domdocument' to return DOMDocument.
  87. * - `loadEntities` Defaults to false. Set to true to enable loading of `<!ENTITY` definitions. This
  88. * is disabled by default for security reasons.
  89. * - `readFile` Set to false to disable file reading. This is important to disable when
  90. * putting user data into Xml::build(). If enabled local files will be read if they exist.
  91. * Defaults to true for backwards compatibility reasons.
  92. * - `parseHuge` Enable the `LIBXML_PARSEHUGE` flag.
  93. *
  94. * If using array as input, you can pass `options` from Xml::fromArray.
  95. *
  96. * @param string|array $input XML string, a path to a file, a URL or an array
  97. * @param array $options The options to use
  98. * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
  99. * @throws \Cake\Utility\Exception\XmlException
  100. */
  101. public static function build($input, array $options = [])
  102. {
  103. $defaults = [
  104. 'return' => 'simplexml',
  105. 'loadEntities' => false,
  106. 'readFile' => true,
  107. 'parseHuge' => false,
  108. ];
  109. $options += $defaults;
  110. if (is_array($input) || is_object($input)) {
  111. return static::fromArray($input, $options);
  112. }
  113. if (strpos($input, '<') !== false) {
  114. return static::_loadXml($input, $options);
  115. }
  116. if ($options['readFile'] && file_exists($input)) {
  117. return static::_loadXml(file_get_contents($input), $options);
  118. }
  119. if (!is_string($input)) {
  120. throw new XmlException('Invalid input.');
  121. }
  122. throw new XmlException('XML cannot be read.');
  123. }
  124. /**
  125. * Parse the input data and create either a SimpleXmlElement object or a DOMDocument.
  126. *
  127. * @param string $input The input to load.
  128. * @param array $options The options to use. See Xml::build()
  129. * @return \SimpleXMLElement|\DOMDocument
  130. * @throws \Cake\Utility\Exception\XmlException
  131. */
  132. protected static function _loadXml($input, $options)
  133. {
  134. $hasDisable = function_exists('libxml_disable_entity_loader');
  135. $internalErrors = libxml_use_internal_errors(true);
  136. if ($hasDisable && !$options['loadEntities']) {
  137. libxml_disable_entity_loader(true);
  138. }
  139. $flags = 0;
  140. if (!empty($options['parseHuge'])) {
  141. $flags |= LIBXML_PARSEHUGE;
  142. }
  143. try {
  144. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  145. $flags |= LIBXML_NOCDATA;
  146. $xml = new SimpleXMLElement($input, $flags);
  147. } else {
  148. $xml = new DOMDocument();
  149. $xml->loadXML($input, $flags);
  150. }
  151. return $xml;
  152. } catch (Exception $e) {
  153. throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
  154. } finally {
  155. if ($hasDisable && !$options['loadEntities']) {
  156. libxml_disable_entity_loader(false);
  157. }
  158. libxml_use_internal_errors($internalErrors);
  159. }
  160. }
  161. /**
  162. * Parse the input html string and create either a SimpleXmlElement object or a DOMDocument.
  163. *
  164. * @param string $input The input html string to load.
  165. * @param array $options The options to use. See Xml::build()
  166. * @return \SimpleXMLElement|\DOMDocument
  167. * @throws \Cake\Utility\Exception\XmlException
  168. */
  169. public static function loadHtml($input, $options = [])
  170. {
  171. $defaults = [
  172. 'return' => 'simplexml',
  173. 'loadEntities' => false,
  174. ];
  175. $options += $defaults;
  176. $hasDisable = function_exists('libxml_disable_entity_loader');
  177. $internalErrors = libxml_use_internal_errors(true);
  178. if ($hasDisable && !$options['loadEntities']) {
  179. libxml_disable_entity_loader(true);
  180. }
  181. $flags = 0;
  182. if (!empty($options['parseHuge'])) {
  183. $flags |= LIBXML_PARSEHUGE;
  184. }
  185. try {
  186. $xml = new DOMDocument();
  187. $xml->loadHTML($input, $flags);
  188. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  189. $flags |= LIBXML_NOCDATA;
  190. $xml = simplexml_import_dom($xml);
  191. }
  192. return $xml;
  193. } catch (Exception $e) {
  194. throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
  195. } finally {
  196. if ($hasDisable && !$options['loadEntities']) {
  197. libxml_disable_entity_loader(false);
  198. }
  199. libxml_use_internal_errors($internalErrors);
  200. }
  201. }
  202. /**
  203. * Transform an array into a SimpleXMLElement
  204. *
  205. * ### Options
  206. *
  207. * - `format` If create childs ('tags') or attributes ('attributes').
  208. * - `pretty` Returns formatted Xml when set to `true`. Defaults to `false`
  209. * - `version` Version of XML document. Default is 1.0.
  210. * - `encoding` Encoding of XML document. If null remove from XML header. Default is the some of application.
  211. * - `return` If return object of SimpleXMLElement ('simplexml') or DOMDocument ('domdocument'). Default is SimpleXMLElement.
  212. *
  213. * Using the following data:
  214. *
  215. * ```
  216. * $value = [
  217. * 'root' => [
  218. * 'tag' => [
  219. * 'id' => 1,
  220. * 'value' => 'defect',
  221. * '@' => 'description'
  222. * ]
  223. * ]
  224. * ];
  225. * ```
  226. *
  227. * Calling `Xml::fromArray($value, 'tags');` Will generate:
  228. *
  229. * `<root><tag><id>1</id><value>defect</value>description</tag></root>`
  230. *
  231. * And calling `Xml::fromArray($value, 'attributes');` Will generate:
  232. *
  233. * `<root><tag id="1" value="defect">description</tag></root>`
  234. *
  235. * @param array|\Cake\Collection\Collection $input Array with data or a collection instance.
  236. * @param string|array $options The options to use or a string to use as format.
  237. * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
  238. * @throws \Cake\Utility\Exception\XmlException
  239. */
  240. public static function fromArray($input, $options = [])
  241. {
  242. if (is_object($input) && method_exists($input, 'toArray') && is_callable([$input, 'toArray'])) {
  243. $input = call_user_func([$input, 'toArray']);
  244. }
  245. if (!is_array($input) || count($input) !== 1) {
  246. throw new XmlException('Invalid input.');
  247. }
  248. $key = key($input);
  249. if (is_int($key)) {
  250. throw new XmlException('The key of input must be alphanumeric');
  251. }
  252. if (!is_array($options)) {
  253. $options = ['format' => (string)$options];
  254. }
  255. $defaults = [
  256. 'format' => 'tags',
  257. 'version' => '1.0',
  258. 'encoding' => mb_internal_encoding(),
  259. 'return' => 'simplexml',
  260. 'pretty' => false
  261. ];
  262. $options += $defaults;
  263. $dom = new DOMDocument($options['version'], $options['encoding']);
  264. if ($options['pretty']) {
  265. $dom->formatOutput = true;
  266. }
  267. self::_fromArray($dom, $dom, $input, $options['format']);
  268. $options['return'] = strtolower($options['return']);
  269. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  270. return new SimpleXMLElement($dom->saveXML());
  271. }
  272. return $dom;
  273. }
  274. /**
  275. * Recursive method to create childs from array
  276. *
  277. * @param \DOMDocument $dom Handler to DOMDocument
  278. * @param \DOMElement $node Handler to DOMElement (child)
  279. * @param array $data Array of data to append to the $node.
  280. * @param string $format Either 'attributes' or 'tags'. This determines where nested keys go.
  281. * @return void
  282. * @throws \Cake\Utility\Exception\XmlException
  283. */
  284. protected static function _fromArray($dom, $node, &$data, $format)
  285. {
  286. if (empty($data) || !is_array($data)) {
  287. return;
  288. }
  289. foreach ($data as $key => $value) {
  290. if (is_string($key)) {
  291. if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
  292. $value = call_user_func([$value, 'toArray']);
  293. }
  294. if (!is_array($value)) {
  295. if (is_bool($value)) {
  296. $value = (int)$value;
  297. } elseif ($value === null) {
  298. $value = '';
  299. }
  300. $isNamespace = strpos($key, 'xmlns:');
  301. if ($isNamespace !== false) {
  302. $node->setAttributeNS('http://www.w3.org/2000/xmlns/', $key, $value);
  303. continue;
  304. }
  305. if ($key[0] !== '@' && $format === 'tags') {
  306. if (!is_numeric($value)) {
  307. // Escape special characters
  308. // https://www.w3.org/TR/REC-xml/#syntax
  309. // https://bugs.php.net/bug.php?id=36795
  310. $child = $dom->createElement($key, '');
  311. $child->appendChild(new DOMText($value));
  312. } else {
  313. $child = $dom->createElement($key, $value);
  314. }
  315. $node->appendChild($child);
  316. } else {
  317. if ($key[0] === '@') {
  318. $key = substr($key, 1);
  319. }
  320. $attribute = $dom->createAttribute($key);
  321. $attribute->appendChild($dom->createTextNode($value));
  322. $node->appendChild($attribute);
  323. }
  324. } else {
  325. if ($key[0] === '@') {
  326. throw new XmlException('Invalid array');
  327. }
  328. if (is_numeric(implode('', array_keys($value)))) {
  329. // List
  330. foreach ($value as $item) {
  331. $itemData = compact('dom', 'node', 'key', 'format');
  332. $itemData['value'] = $item;
  333. static::_createChild($itemData);
  334. }
  335. } else {
  336. // Struct
  337. static::_createChild(compact('dom', 'node', 'key', 'value', 'format'));
  338. }
  339. }
  340. } else {
  341. throw new XmlException('Invalid array');
  342. }
  343. }
  344. }
  345. /**
  346. * Helper to _fromArray(). It will create childs of arrays
  347. *
  348. * @param array $data Array with information to create childs
  349. * @return void
  350. */
  351. protected static function _createChild($data)
  352. {
  353. $data += [
  354. 'dom' => null,
  355. 'node' => null,
  356. 'key' => null,
  357. 'value' => null,
  358. 'format' => null,
  359. ];
  360. $value = $data['value'];
  361. $dom = $data['dom'];
  362. $key = $data['key'];
  363. $format = $data['format'];
  364. $node = $data['node'];
  365. $childNS = $childValue = null;
  366. if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
  367. $value = call_user_func([$value, 'toArray']);
  368. }
  369. if (is_array($value)) {
  370. if (isset($value['@'])) {
  371. $childValue = (string)$value['@'];
  372. unset($value['@']);
  373. }
  374. if (isset($value['xmlns:'])) {
  375. $childNS = $value['xmlns:'];
  376. unset($value['xmlns:']);
  377. }
  378. } elseif (!empty($value) || $value === 0 || $value === '0') {
  379. $childValue = (string)$value;
  380. }
  381. $child = $dom->createElement($key);
  382. if ($childValue !== null) {
  383. $child->appendChild($dom->createTextNode($childValue));
  384. }
  385. if ($childNS) {
  386. $child->setAttribute('xmlns', $childNS);
  387. }
  388. static::_fromArray($dom, $child, $value, $format);
  389. $node->appendChild($child);
  390. }
  391. /**
  392. * Returns this XML structure as an array.
  393. *
  394. * @param \SimpleXMLElement|\DOMDocument|\DOMNode $obj SimpleXMLElement, DOMDocument or DOMNode instance
  395. * @return array Array representation of the XML structure.
  396. * @throws \Cake\Utility\Exception\XmlException
  397. */
  398. public static function toArray($obj)
  399. {
  400. if ($obj instanceof DOMNode) {
  401. $obj = simplexml_import_dom($obj);
  402. }
  403. if (!($obj instanceof SimpleXMLElement)) {
  404. throw new XmlException('The input is not instance of SimpleXMLElement, DOMDocument or DOMNode.');
  405. }
  406. $result = [];
  407. $namespaces = array_merge(['' => ''], $obj->getNamespaces(true));
  408. static::_toArray($obj, $result, '', array_keys($namespaces));
  409. return $result;
  410. }
  411. /**
  412. * Recursive method to toArray
  413. *
  414. * @param \SimpleXMLElement $xml SimpleXMLElement object
  415. * @param array $parentData Parent array with data
  416. * @param string $ns Namespace of current child
  417. * @param array $namespaces List of namespaces in XML
  418. * @return void
  419. */
  420. protected static function _toArray($xml, &$parentData, $ns, $namespaces)
  421. {
  422. $data = [];
  423. foreach ($namespaces as $namespace) {
  424. foreach ($xml->attributes($namespace, true) as $key => $value) {
  425. if (!empty($namespace)) {
  426. $key = $namespace . ':' . $key;
  427. }
  428. $data['@' . $key] = (string)$value;
  429. }
  430. foreach ($xml->children($namespace, true) as $child) {
  431. static::_toArray($child, $data, $namespace, $namespaces);
  432. }
  433. }
  434. $asString = trim((string)$xml);
  435. if (empty($data)) {
  436. $data = $asString;
  437. } elseif (strlen($asString) > 0) {
  438. $data['@'] = $asString;
  439. }
  440. if (!empty($ns)) {
  441. $ns .= ':';
  442. }
  443. $name = $ns . $xml->getName();
  444. if (isset($parentData[$name])) {
  445. if (!is_array($parentData[$name]) || !isset($parentData[$name][0])) {
  446. $parentData[$name] = [$parentData[$name]];
  447. }
  448. $parentData[$name][] = $data;
  449. } else {
  450. $parentData[$name] = $data;
  451. }
  452. }
  453. }