free(); } /** * Frees up any references so as to allow the PHP garbage * collection from unset() to work. */ private function free() { unset($this->page); $this->forms = array(); $this->labels = array(); } /** * This builder is only available if the 'tidy' extension is loaded. * @return boolean True if available. */ function can() { return extension_loaded('tidy'); } /** * Reads the raw content the page using HTML Tidy. * @param $response SimpleHttpResponse Fetched response. * @return SimplePage Newly parsed page. */ function parse($response) { $this->page = new SimplePage($response); $tidied = tidy_parse_string($input = $this->insertGuards($response->getContent()), array('output-xml' => false, 'wrap' => '0', 'indent' => 'no'), 'latin1'); $this->walkTree($tidied->html()); $this->attachLabels($this->widgets_by_id, $this->labels); $this->page->setForms($this->forms); $page = $this->page; $this->free(); return $page; } /** * Stops HTMLTidy stripping content that we wish to preserve. * @param string The raw html. * @return string The html with guard tags inserted. */ private function insertGuards($html) { return $this->insertEmptyTagGuards($this->insertTextareaSimpleWhitespaceGuards($html)); } /** * Removes the extra content added during the parse stage * in order to preserve content we don't want stripped * out by HTMLTidy. * @param string The raw html. * @return string The html with guard tags removed. */ private function stripGuards($html) { return $this->stripTextareaWhitespaceGuards($this->stripEmptyTagGuards($html)); } /** * HTML tidy strips out empty tags such as