550 lines
17 KiB
PHP
550 lines
17 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* base include file for SimpleTest
|
||
|
* @package SimpleTest
|
||
|
* @subpackage WebTester
|
||
|
* @version $Id: url.php 2011 2011-04-29 08:22:48Z pp11 $
|
||
|
*/
|
||
|
|
||
|
/**#@+
|
||
|
* include other SimpleTest class files
|
||
|
*/
|
||
|
require_once(dirname(__FILE__) . '/encoding.php');
|
||
|
/**#@-*/
|
||
|
|
||
|
/**
|
||
|
* URL parser to replace parse_url() PHP function which
|
||
|
* got broken in PHP 4.3.0. Adds some browser specific
|
||
|
* functionality such as expandomatics.
|
||
|
* Guesses a bit trying to separate the host from
|
||
|
* the path and tries to keep a raw, possibly unparsable,
|
||
|
* request string as long as possible.
|
||
|
* @package SimpleTest
|
||
|
* @subpackage WebTester
|
||
|
*/
|
||
|
class SimpleUrl {
|
||
|
private $scheme;
|
||
|
private $username;
|
||
|
private $password;
|
||
|
private $host;
|
||
|
private $port;
|
||
|
public $path;
|
||
|
private $request;
|
||
|
private $fragment;
|
||
|
private $x;
|
||
|
private $y;
|
||
|
private $target;
|
||
|
private $raw = false;
|
||
|
|
||
|
/**
|
||
|
* Constructor. Parses URL into sections.
|
||
|
* @param string $url Incoming URL.
|
||
|
* @access public
|
||
|
*/
|
||
|
function __construct($url = '') {
|
||
|
list($x, $y) = $this->chompCoordinates($url);
|
||
|
$this->setCoordinates($x, $y);
|
||
|
$this->scheme = $this->chompScheme($url);
|
||
|
if ($this->scheme === 'file') {
|
||
|
// Unescaped backslashes not used in directory separator context
|
||
|
// will get caught by this, but they should have been urlencoded
|
||
|
// anyway so we don't care. If this ends up being a problem, the
|
||
|
// host regexp must be modified to match for backslashes when
|
||
|
// the scheme is file.
|
||
|
$url = str_replace('\\', '/', $url);
|
||
|
}
|
||
|
list($this->username, $this->password) = $this->chompLogin($url);
|
||
|
$this->host = $this->chompHost($url);
|
||
|
$this->port = false;
|
||
|
if (preg_match('/(.*?):(.*)/', $this->host, $host_parts)) {
|
||
|
if ($this->scheme === 'file' && strlen($this->host) === 2) {
|
||
|
// DOS drive was placed in authority; promote it to path.
|
||
|
$url = '/' . $this->host . $url;
|
||
|
$this->host = false;
|
||
|
} else {
|
||
|
$this->host = $host_parts[1];
|
||
|
$this->port = (integer)$host_parts[2];
|
||
|
}
|
||
|
}
|
||
|
$this->path = $this->chompPath($url);
|
||
|
$this->request = $this->parseRequest($this->chompRequest($url));
|
||
|
$this->fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
|
||
|
$this->target = false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Extracts the X, Y coordinate pair from an image map.
|
||
|
* @param string $url URL so far. The coordinates will be
|
||
|
* removed.
|
||
|
* @return array X, Y as a pair of integers.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function chompCoordinates(&$url) {
|
||
|
if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
|
||
|
$url = $matches[1];
|
||
|
return array((integer)$matches[2], (integer)$matches[3]);
|
||
|
}
|
||
|
return array(false, false);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Extracts the scheme part of an incoming URL.
|
||
|
* @param string $url URL so far. The scheme will be
|
||
|
* removed.
|
||
|
* @return string Scheme part or false.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function chompScheme(&$url) {
|
||
|
if (preg_match('#^([^/:]*):(//)(.*)#', $url, $matches)) {
|
||
|
$url = $matches[2] . $matches[3];
|
||
|
return $matches[1];
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Extracts the username and password from the
|
||
|
* incoming URL. The // prefix will be reattached
|
||
|
* to the URL after the doublet is extracted.
|
||
|
* @param string $url URL so far. The username and
|
||
|
* password are removed.
|
||
|
* @return array Two item list of username and
|
||
|
* password. Will urldecode() them.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function chompLogin(&$url) {
|
||
|
$prefix = '';
|
||
|
if (preg_match('#^(//)(.*)#', $url, $matches)) {
|
||
|
$prefix = $matches[1];
|
||
|
$url = $matches[2];
|
||
|
}
|
||
|
if (preg_match('#^([^/]*)@(.*)#', $url, $matches)) {
|
||
|
$url = $prefix . $matches[2];
|
||
|
$parts = explode(":", $matches[1]);
|
||
|
return array(
|
||
|
urldecode($parts[0]),
|
||
|
isset($parts[1]) ? urldecode($parts[1]) : false);
|
||
|
}
|
||
|
$url = $prefix . $url;
|
||
|
return array(false, false);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Extracts the host part of an incoming URL.
|
||
|
* Includes the port number part. Will extract
|
||
|
* the host if it starts with // or it has
|
||
|
* a top level domain or it has at least two
|
||
|
* dots.
|
||
|
* @param string $url URL so far. The host will be
|
||
|
* removed.
|
||
|
* @return string Host part guess or false.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function chompHost(&$url) {
|
||
|
if (preg_match('!^(//)(.*?)(/.*|\?.*|#.*|$)!', $url, $matches)) {
|
||
|
$url = $matches[3];
|
||
|
return $matches[2];
|
||
|
}
|
||
|
if (preg_match('!(.*?)(\.\./|\./|/|\?|#|$)(.*)!', $url, $matches)) {
|
||
|
$tlds = SimpleUrl::getAllTopLevelDomains();
|
||
|
if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
|
||
|
$url = $matches[2] . $matches[3];
|
||
|
return $matches[1];
|
||
|
} elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
|
||
|
$url = $matches[2] . $matches[3];
|
||
|
return $matches[1];
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Extracts the path information from the incoming
|
||
|
* URL. Strips this path from the URL.
|
||
|
* @param string $url URL so far. The host will be
|
||
|
* removed.
|
||
|
* @return string Path part or '/'.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function chompPath(&$url) {
|
||
|
if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
|
||
|
$url = $matches[2] . $matches[3];
|
||
|
return ($matches[1] ? $matches[1] : '');
|
||
|
}
|
||
|
return '';
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Strips off the request data.
|
||
|
* @param string $url URL so far. The request will be
|
||
|
* removed.
|
||
|
* @return string Raw request part.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function chompRequest(&$url) {
|
||
|
if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
|
||
|
$url = $matches[2] . $matches[3];
|
||
|
return $matches[1];
|
||
|
}
|
||
|
return '';
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Breaks the request down into an object.
|
||
|
* @param string $raw Raw request.
|
||
|
* @return SimpleFormEncoding Parsed data.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function parseRequest($raw) {
|
||
|
$this->raw = $raw;
|
||
|
$request = new SimpleGetEncoding();
|
||
|
foreach (explode("&", $raw) as $pair) {
|
||
|
if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
|
||
|
$request->add(urldecode($matches[1]), urldecode($matches[2]));
|
||
|
} elseif ($pair) {
|
||
|
$request->add(urldecode($pair), '');
|
||
|
}
|
||
|
}
|
||
|
return $request;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for protocol part.
|
||
|
* @param string $default Value to use if not present.
|
||
|
* @return string Scheme name, e.g "http".
|
||
|
* @access public
|
||
|
*/
|
||
|
function getScheme($default = false) {
|
||
|
return $this->scheme ? $this->scheme : $default;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for user name.
|
||
|
* @return string Username preceding host.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getUsername() {
|
||
|
return $this->username;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for password.
|
||
|
* @return string Password preceding host.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getPassword() {
|
||
|
return $this->password;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for hostname and port.
|
||
|
* @param string $default Value to use if not present.
|
||
|
* @return string Hostname only.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getHost($default = false) {
|
||
|
return $this->host ? $this->host : $default;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for top level domain.
|
||
|
* @return string Last part of host.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getTld() {
|
||
|
$path_parts = pathinfo($this->getHost());
|
||
|
return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for port number.
|
||
|
* @return integer TCP/IP port number.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getPort() {
|
||
|
return $this->port;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for path.
|
||
|
* @return string Full path including leading slash if implied.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getPath() {
|
||
|
if (! $this->path && $this->host) {
|
||
|
return '/';
|
||
|
}
|
||
|
return $this->path;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for page if any. This may be a
|
||
|
* directory name if ambiguious.
|
||
|
* @return Page name.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getPage() {
|
||
|
if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
|
||
|
return false;
|
||
|
}
|
||
|
return $matches[1];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Gets the path to the page.
|
||
|
* @return string Path less the page.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getBasePath() {
|
||
|
if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
|
||
|
return false;
|
||
|
}
|
||
|
return $matches[1];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for fragment at end of URL after the "#".
|
||
|
* @return string Part after "#".
|
||
|
* @access public
|
||
|
*/
|
||
|
function getFragment() {
|
||
|
return $this->fragment;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Sets image coordinates. Set to false to clear
|
||
|
* them.
|
||
|
* @param integer $x Horizontal position.
|
||
|
* @param integer $y Vertical position.
|
||
|
* @access public
|
||
|
*/
|
||
|
function setCoordinates($x = false, $y = false) {
|
||
|
if (($x === false) || ($y === false)) {
|
||
|
$this->x = $this->y = false;
|
||
|
return;
|
||
|
}
|
||
|
$this->x = (integer)$x;
|
||
|
$this->y = (integer)$y;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for horizontal image coordinate.
|
||
|
* @return integer X value.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getX() {
|
||
|
return $this->x;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for vertical image coordinate.
|
||
|
* @return integer Y value.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getY() {
|
||
|
return $this->y;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Accessor for current request parameters
|
||
|
* in URL string form. Will return teh original request
|
||
|
* if at all possible even if it doesn't make much
|
||
|
* sense.
|
||
|
* @return string Form is string "?a=1&b=2", etc.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getEncodedRequest() {
|
||
|
if ($this->raw) {
|
||
|
$encoded = $this->raw;
|
||
|
} else {
|
||
|
$encoded = $this->request->asUrlRequest();
|
||
|
}
|
||
|
if ($encoded) {
|
||
|
return '?' . preg_replace('/^\?/', '', $encoded);
|
||
|
}
|
||
|
return '';
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Adds an additional parameter to the request.
|
||
|
* @param string $key Name of parameter.
|
||
|
* @param string $value Value as string.
|
||
|
* @access public
|
||
|
*/
|
||
|
function addRequestParameter($key, $value) {
|
||
|
$this->raw = false;
|
||
|
$this->request->add($key, $value);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Adds additional parameters to the request.
|
||
|
* @param hash/SimpleFormEncoding $parameters Additional
|
||
|
* parameters.
|
||
|
* @access public
|
||
|
*/
|
||
|
function addRequestParameters($parameters) {
|
||
|
$this->raw = false;
|
||
|
$this->request->merge($parameters);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Clears down all parameters.
|
||
|
* @access public
|
||
|
*/
|
||
|
function clearRequest() {
|
||
|
$this->raw = false;
|
||
|
$this->request = new SimpleGetEncoding();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Gets the frame target if present. Although
|
||
|
* not strictly part of the URL specification it
|
||
|
* acts as similarily to the browser.
|
||
|
* @return boolean/string Frame name or false if none.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getTarget() {
|
||
|
return $this->target;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Attaches a frame target.
|
||
|
* @param string $frame Name of frame.
|
||
|
* @access public
|
||
|
*/
|
||
|
function setTarget($frame) {
|
||
|
$this->raw = false;
|
||
|
$this->target = $frame;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Renders the URL back into a string.
|
||
|
* @return string URL in canonical form.
|
||
|
* @access public
|
||
|
*/
|
||
|
function asString() {
|
||
|
$path = $this->path;
|
||
|
$scheme = $identity = $host = $port = $encoded = $fragment = '';
|
||
|
if ($this->username && $this->password) {
|
||
|
$identity = $this->username . ':' . $this->password . '@';
|
||
|
}
|
||
|
if ($this->getHost()) {
|
||
|
$scheme = $this->getScheme() ? $this->getScheme() : 'http';
|
||
|
$scheme .= '://';
|
||
|
$host = $this->getHost();
|
||
|
} elseif ($this->getScheme() === 'file') {
|
||
|
// Safest way; otherwise, file URLs on Windows have an extra
|
||
|
// leading slash. It might be possible to convert file://
|
||
|
// URIs to local file paths, but that requires more research.
|
||
|
$scheme = 'file://';
|
||
|
}
|
||
|
if ($this->getPort() && $this->getPort() != 80 ) {
|
||
|
$port = ':'.$this->getPort();
|
||
|
}
|
||
|
|
||
|
if (substr($this->path, 0, 1) == '/') {
|
||
|
$path = $this->normalisePath($this->path);
|
||
|
}
|
||
|
$encoded = $this->getEncodedRequest();
|
||
|
$fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
|
||
|
$coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
|
||
|
return "$scheme$identity$host$port$path$encoded$fragment$coords";
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replaces unknown sections to turn a relative
|
||
|
* URL into an absolute one. The base URL can
|
||
|
* be either a string or a SimpleUrl object.
|
||
|
* @param string/SimpleUrl $base Base URL.
|
||
|
* @access public
|
||
|
*/
|
||
|
function makeAbsolute($base) {
|
||
|
if (! is_object($base)) {
|
||
|
$base = new SimpleUrl($base);
|
||
|
}
|
||
|
if ($this->getHost()) {
|
||
|
$scheme = $this->getScheme();
|
||
|
$host = $this->getHost();
|
||
|
$port = $this->getPort() ? ':' . $this->getPort() : '';
|
||
|
$identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
|
||
|
if (! $identity) {
|
||
|
$identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
|
||
|
}
|
||
|
} else {
|
||
|
$scheme = $base->getScheme();
|
||
|
$host = $base->getHost();
|
||
|
$port = $base->getPort() ? ':' . $base->getPort() : '';
|
||
|
$identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
|
||
|
}
|
||
|
$path = $this->normalisePath($this->extractAbsolutePath($base));
|
||
|
$encoded = $this->getEncodedRequest();
|
||
|
$fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
|
||
|
$coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
|
||
|
return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replaces unknown sections of the path with base parts
|
||
|
* to return a complete absolute one.
|
||
|
* @param string/SimpleUrl $base Base URL.
|
||
|
* @param string Absolute path.
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function extractAbsolutePath($base) {
|
||
|
if ($this->getHost()) {
|
||
|
return $this->path;
|
||
|
}
|
||
|
if (! $this->isRelativePath($this->path)) {
|
||
|
return $this->path;
|
||
|
}
|
||
|
if ($this->path) {
|
||
|
return $base->getBasePath() . $this->path;
|
||
|
}
|
||
|
return $base->getPath();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Simple test to see if a path part is relative.
|
||
|
* @param string $path Path to test.
|
||
|
* @return boolean True if starts with a "/".
|
||
|
* @access private
|
||
|
*/
|
||
|
protected function isRelativePath($path) {
|
||
|
return (substr($path, 0, 1) != '/');
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Extracts the username and password for use in rendering
|
||
|
* a URL.
|
||
|
* @return string/boolean Form of username:password or false.
|
||
|
* @access public
|
||
|
*/
|
||
|
function getIdentity() {
|
||
|
if ($this->username && $this->password) {
|
||
|
return $this->username . ':' . $this->password;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Replaces . and .. sections of the path.
|
||
|
* @param string $path Unoptimised path.
|
||
|
* @return string Path with dots removed if possible.
|
||
|
* @access public
|
||
|
*/
|
||
|
function normalisePath($path) {
|
||
|
$path = preg_replace('|/\./|', '/', $path);
|
||
|
return preg_replace('|/[^/]+/\.\./|', '/', $path);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* A pipe seperated list of all TLDs that result in two part
|
||
|
* domain names.
|
||
|
* @return string Pipe separated list.
|
||
|
* @access public
|
||
|
*/
|
||
|
static function getAllTopLevelDomains() {
|
||
|
return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
|
||
|
}
|
||
|
}
|
||
|
?>
|