1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310: 311: 312: 313: 314: 315: 316: 317: 318: 319: 320: 321: 322: 323: 324: 325: 326: 327: 328: 329: 330: 331: 332: 333: 334: 335: 336: 337: 338: 339: 340: 341: 342: 343: 344: 345: 346: 347: 348: 349: 350: 351: 352: 353: 354: 355: 356: 357: 358: 359: 360: 361: 362: 363: 364: 365: 366: 367: 368: 369: 370: 371: 372: 373: 374:
<?php
namespace Coast;
use Coast\Http;
use DOMDocument;
use DOMXPath;
class Url
{
const PART_SCHEME = 0;
const PART_USER = 1;
const PART_PASS = 2;
const PART_HOST = 3;
const PART_PORT = 4;
const PART_PATH = 5;
const PART_QUERY = 6;
const PART_FRAGMENT = 7;
protected $_scheme;
protected $_user;
protected $_pass;
protected $_host;
protected $_port;
protected $_path;
protected $_queryParams = [];
protected $_fragment;
public function __construct($value = null)
{
if (isset($value)) {
if (is_array($value)) {
$this->fromArray($value);
} else {
$this->fromString($value);
}
}
}
public function fromString($value)
{
$parts = parse_url($value);
$this->fromArray($parts);
return $this;
}
public function toString()
{
if (!isset($this->_scheme) &&
!isset($this->_user) &&
!isset($this->_pass) &&
!isset($this->_host) &&
!isset($this->_port) &&
!count($this->_queryParams) &&
!isset($this->_fragment) &&
isset($this->_path)) {
return $this->_path->name();
}
$string = http_build_url($this->toArray());
$string = preg_replace('/^(mailto|tel|data):\/{3}/', '$1:', $string);
if (!isset($this->_scheme) &&
isset($this->_host)) {
$string = '//' . $string;
} else if (!isset($this->_scheme) &&
!isset($this->_user) &&
!isset($this->_pass) &&
!isset($this->_host) &&
!isset($this->_port) &&
(!isset($this->_path) || substr($this->_path->name(), 0, 1) != '/')) {
$string = ltrim($string, '/');
} else if (!isset($this->_path) &&
!isset($this->_fragment) &&
!count($this->_queryParams)) {
$string = rtrim($string, '/');
}
return $string;
}
public function fromArray(array $parts)
{
$parts = array_intersect_key($parts, [
'scheme',
'user',
'pass',
'host',
'port',
'path',
'query',
'queryParams',
'fragment',
]);
foreach ($parts as $method => $value) {
$this->{$method}($value);
}
return $this;
}
public function toArray()
{
return [
'scheme' => $this->scheme(),
'user' => $this->user(),
'pass' => $this->pass(),
'host' => $this->host(),
'port' => $this->port(),
'path' => $this->path(),
'query' => $this->query(),
'fragment' => $this->fragment(),
];
}
public function toPart($part, $reverse = false)
{
return new Url($reverse
? array_slice($this->toArray(), $part)
: array_slice($this->toArray(), 0, $part + 1));
}
public function __toString()
{
return $this->toString();
}
public function scheme($scheme = null)
{
if (func_num_args() > 0) {
$this->_scheme = $scheme;
return $this;
}
return $this->_scheme;
}
public function isHttp()
{
$scheme = strtolower($this->scheme());
return $scheme == 'http' || $scheme == 'https';
}
public function isHttps()
{
$scheme = strtolower($this->scheme());
return $scheme == 'https';
}
public function user($user = null)
{
if (func_num_args() > 0) {
$this->_user = $user;
return $this;
}
return $this->_user;
}
public function pass($pass = null)
{
if (func_num_args() > 0) {
$this->_pass = $pass;
return $this;
}
return $this->_pass;
}
public function host($host = null)
{
if (func_num_args() > 0) {
$this->_host = $host;
return $this;
}
return $this->_host;
}
public function port($port = null)
{
if (func_num_args() > 0) {
$this->_port = $port;
return $this;
}
return $this->_port;
}
public function path($path = null)
{
if (func_num_args() > 0) {
$this->_path = !$path instanceof \Coast\Path
? new \Coast\Path("{$path}")
: $path;
return $this;
}
return $this->_path;
}
public function queryParam($name, $value = null)
{
if (func_num_args() > 1) {
if (isset($value)) {
$this->_queryParams[$name] = $value;
} else {
unset($this->_queryParams[$name]);
}
return $this;
}
return isset($this->_queryParams[$name])
? $this->_queryParams[$name]
: null;
}
public function queryParams(array $querys = null)
{
if (func_num_args() > 0) {
if (isset($querys)) {
foreach ($querys as $name => $value) {
$this->queryParam($name, $value);
}
} else {
$this->_queryParams = [];
}
return $this;
}
return $this->_queryParams;
}
public function query($query = null)
{
if (func_num_args() > 0) {
if (isset($query)) {
parse_str($query, $params);
$this->queryParams($params);
} else {
$this->_queryParams = [];
}
return $this;
}
$queryParams = $this->_queryParams;
array_walk_recursive($queryParams, function(&$v) { $v = (string) $v; });
return count($queryParams)
? http_build_query($queryParams, '', '&', PHP_QUERY_RFC3986)
: null;
}
public function fragment($fragment = null)
{
if (func_num_args() > 0) {
$this->_fragment = $fragment;
return $this;
}
return $this->_fragment;
}
public function isAbsolute()
{
return isset($this->_scheme);
}
public function isRelative()
{
return !$this->isAbsolute();
}
public function toAbsolute(Url $base)
{
if (!$this->isRelative() || !$base->isAbsolute()) {
throw new \Exception("URL '{$this}' is not relative or base URL '{$base}' is not absolute");
}
$current = $this->toArray();
$base = $base->toArray();
$switch = false;
$temp = [];
foreach ($base as $name => $value) {
if (isset($current[$name])) {
if (!$switch) {
$switch = true;
}
}
if ($switch) {
$value = $name == 'path' && $current[$name]->isRelative()
? $current[$name]->toAbsolute($base[$name])
: $current[$name];
}
$temp[$name] = $value;
}
$url = new Url($temp);
return $url;
}
public function toRelative(Url $base)
{
if (!$this->isAbsolute() || !$base->isAbsolute()) {
throw new \Exception("URL '{$this}' is not absolute or base URL '{$base}' is not absolute");
}
$current = $this->toArray();
$base = $base->toArray();
$switch = false;
$temp = [];
foreach ($base as $name => $value) {
if ($current[$name] != $value) {
if (!$switch) {
$switch = true;
}
} else {
$value = null;
}
if ($switch) {
$value = $name == 'path' && !array_filter($temp, function($a) { return isset($a); })
? $current[$name]->toRelative($base[$name])
: $current[$name];
}
$temp[$name] = $value;
}
$url = new Url($temp);
return $url;
}
public function toCanonical()
{
$url = clone $this;
if (!$url->isHttp()) {
return $url;
}
$http = new Http([
'timeout' => 5,
]);
$req = new Http\Request([
'url' => $url,
]);
$res = $http->execute($req);
if (!$res->isSuccess()) {
return $url;
}
$url = $res->url();
if (!preg_match('/^text\/html/i', $res->header('content-type'))) {
return $url;
}
$doc = new DOMDocument();
libxml_use_internal_errors(true);
$result = $doc->loadHTML($res->body());
libxml_use_internal_errors(false);
if (!$result) {
return $url;
}
$types = [
['//link[@rel="canonical"]', 'href'],
['//meta[@property="og:url"]', 'content'],
];
$xpath = new DOMXPath($doc);
foreach ($types as $type) {
$els = $xpath->query($type[0]);
if ($els->length) {
$temp = new Url($els->item(0)->getAttribute($type[1]));
if ($temp->isRelative()) {
$temp = $temp->toAbsolute($url);
}
$url = $temp;
break;
}
}
return $url;
}
}