| 260 | | $opts = array( |
|---|
| 261 | | 'http' => array( |
|---|
| 262 | | 'timeout' => 3.5, |
|---|
| 263 | | 'method' => 'GET', |
|---|
| 264 | | 'user_agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12' |
|---|
| 265 | | ) |
|---|
| 266 | | ); |
|---|
| 267 | | $context = stream_context_create($opts); |
|---|
| 268 | | |
|---|
| 269 | | if ($page = fopen($url, 'r', false, $context)) { |
|---|
| 270 | | stream_set_timeout($page, 3.5); |
|---|
| 271 | | $data = stream_get_meta_data($page); |
|---|
| 272 | | foreach($data['wrapper_data'] as $header) { |
|---|
| 273 | | if (preg_match('/^Content-Type: ([^;]+)/', $header, $match) && |
|---|
| 274 | | !preg_match('#^(text/x?html|application/xhtml+xml)$#', $match[1])) { |
|---|
| 275 | | $title = $match[1]; |
|---|
| 276 | | } |
|---|
| 277 | | } |
|---|
| 278 | | if (!isset($title)) { |
|---|
| 279 | | $content = ''; |
|---|
| 280 | | $tstamp = time() + 5; |
|---|
| 281 | | |
|---|
| 282 | | while ($chunk = fread($page, 64)) { |
|---|
| 283 | | $data = stream_get_meta_data($page); |
|---|
| 284 | | if ($data['timed_out']) { |
|---|
| 285 | | $this->debug('Url Timed Out: ' . $url); |
|---|
| 286 | | $this->errorStatus = true; |
|---|
| 287 | | break; |
|---|
| 288 | | } |
|---|
| 289 | | $content .= $chunk; |
|---|
| 290 | | // Check for timeout |
|---|
| 291 | | if (time() > $tstamp) break; |
|---|
| 292 | | // Try to read title |
|---|
| 293 | | if (preg_match('#<title[^>]*>(.*)#is', $content, $m)) { |
|---|
| 294 | | // Start another loop to grab some more data in order to be sure we have the complete title |
|---|
| 295 | | $content = $m[1]; |
|---|
| 296 | | $loop = 2; |
|---|
| 297 | | while (($chunk = fread($page, 64)) && $loop-- && !strstr($content, '<')) { |
|---|
| 298 | | $content .= $chunk; |
|---|
| 299 | | // Check for timeout |
|---|
| 300 | | if (time() > $tstamp) break; |
|---|
| 301 | | } |
|---|
| 302 | | preg_match('#^([^<]*)#is', $content, $m); |
|---|
| 303 | | $title = preg_replace('#\s+#', ' ', $m[1]); |
|---|
| 304 | | $title = trim($this->decode($title, $this->titleLength)); |
|---|
| 305 | | break; |
|---|
| 306 | | } |
|---|
| 307 | | // Title won't appear beyond that point so stop parsing |
|---|
| 308 | | if (preg_match('#</head>|<body#i', $content)) { |
|---|
| 309 | | break; |
|---|
| 310 | | } |
|---|
| 311 | | } |
|---|
| 312 | | } |
|---|
| 313 | | fclose($page); |
|---|
| 314 | | } else if (!$this->errorStatus) { |
|---|
| 315 | | $this->debug('Couldn\t Open Url: ' . $url); |
|---|
| 316 | | } |
|---|
| 317 | | |
|---|
| 318 | | if (empty($title)) { |
|---|
| 319 | | if ($this->errorStatus) { |
|---|
| 320 | | if (!$this->showErrors || empty($this->errorMessage)) { |
|---|
| 321 | | continue; |
|---|
| 322 | | } |
|---|
| 323 | | $title = $this->errorMessage; |
|---|
| 324 | | $this->errorStatus = false; |
|---|
| 325 | | $this->errorMessage = null; |
|---|
| 326 | | } else { |
|---|
| 327 | | $title = 'No Title'; |
|---|
| 328 | | } |
|---|
| 329 | | } |
|---|
| | 262 | self::getTitle($url); |
|---|
| | 535 | |
|---|
| | 536 | /** |
|---|
| | 537 | * Returns the title of the given page |
|---|
| | 538 | * |
|---|
| | 539 | * @param string $url url to the page |
|---|
| | 540 | * @return string title |
|---|
| | 541 | */ |
|---|
| | 542 | public function getTitle($url) |
|---|
| | 543 | { |
|---|
| | 544 | $opts = array( |
|---|
| | 545 | 'http' => array( |
|---|
| | 546 | 'timeout' => 3.5, |
|---|
| | 547 | 'method' => 'GET', |
|---|
| | 548 | 'user_agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12' |
|---|
| | 549 | ) |
|---|
| | 550 | ); |
|---|
| | 551 | $context = stream_context_create($opts); |
|---|
| | 552 | |
|---|
| | 553 | if ($page = fopen($url, 'r', false, $context)) { |
|---|
| | 554 | stream_set_timeout($page, 3.5); |
|---|
| | 555 | $data = stream_get_meta_data($page); |
|---|
| | 556 | foreach($data['wrapper_data'] as $header) { |
|---|
| | 557 | if (preg_match('/^Content-Type: ([^;]+)/', $header, $match) && |
|---|
| | 558 | !preg_match('#^(text/x?html|application/xhtml+xml)$#', $match[1])) { |
|---|
| | 559 | $title = $match[1]; |
|---|
| | 560 | } |
|---|
| | 561 | } |
|---|
| | 562 | if (!isset($title)) { |
|---|
| | 563 | $content = ''; |
|---|
| | 564 | $tstamp = time() + 5; |
|---|
| | 565 | |
|---|
| | 566 | while ($chunk = fread($page, 64)) { |
|---|
| | 567 | $data = stream_get_meta_data($page); |
|---|
| | 568 | if ($data['timed_out']) { |
|---|
| | 569 | $this->debug('Url Timed Out: ' . $url); |
|---|
| | 570 | $this->errorStatus = true; |
|---|
| | 571 | break; |
|---|
| | 572 | } |
|---|
| | 573 | $content .= $chunk; |
|---|
| | 574 | // Check for timeout |
|---|
| | 575 | if (time() > $tstamp) break; |
|---|
| | 576 | // Try to read title |
|---|
| | 577 | if (preg_match('#<title[^>]*>(.*)#is', $content, $m)) { |
|---|
| | 578 | // Start another loop to grab some more data in order to be sure we have the complete title |
|---|
| | 579 | $content = $m[1]; |
|---|
| | 580 | $loop = 2; |
|---|
| | 581 | while (($chunk = fread($page, 64)) && $loop-- && !strstr($content, '<')) { |
|---|
| | 582 | $content .= $chunk; |
|---|
| | 583 | // Check for timeout |
|---|
| | 584 | if (time() > $tstamp) break; |
|---|
| | 585 | } |
|---|
| | 586 | preg_match('#^([^<]*)#is', $content, $m); |
|---|
| | 587 | $title = preg_replace('#\s+#', ' ', $m[1]); |
|---|
| | 588 | $title = trim($this->decode($title, $this->titleLength)); |
|---|
| | 589 | break; |
|---|
| | 590 | } |
|---|
| | 591 | // Title won't appear beyond that point so stop parsing |
|---|
| | 592 | if (preg_match('#</head>|<body#i', $content)) { |
|---|
| | 593 | break; |
|---|
| | 594 | } |
|---|
| | 595 | } |
|---|
| | 596 | } |
|---|
| | 597 | fclose($page); |
|---|
| | 598 | } else if (!$this->errorStatus) { |
|---|
| | 599 | $this->debug('Couldn\t Open Url: ' . $url); |
|---|
| | 600 | } |
|---|
| | 601 | |
|---|
| | 602 | if (empty($title)) { |
|---|
| | 603 | if ($this->errorStatus) { |
|---|
| | 604 | if (!$this->showErrors || empty($this->errorMessage)) { |
|---|
| | 605 | continue; |
|---|
| | 606 | } |
|---|
| | 607 | $title = $this->errorMessage; |
|---|
| | 608 | $this->errorStatus = false; |
|---|
| | 609 | $this->errorMessage = null; |
|---|
| | 610 | } else { |
|---|
| | 611 | $title = 'No Title'; |
|---|
| | 612 | } |
|---|
| | 613 | } |
|---|
| | 614 | |
|---|
| | 615 | return $title; |
|---|
| | 616 | } |
|---|