Automated build for v0.01
This commit is contained in:
160
vendor/andreskrey/Readability/Nodes/NodeUtility.php
vendored
Normal file
160
vendor/andreskrey/Readability/Nodes/NodeUtility.php
vendored
Normal file
@ -0,0 +1,160 @@
|
||||
<?php
|
||||
|
||||
namespace andreskrey\Readability\Nodes;
|
||||
|
||||
use andreskrey\Readability\Nodes\DOM\DOMDocument;
|
||||
use andreskrey\Readability\Nodes\DOM\DOMElement;
|
||||
use andreskrey\Readability\Nodes\DOM\DOMNode;
|
||||
|
||||
/**
|
||||
* Class NodeUtility.
|
||||
*/
|
||||
class NodeUtility
|
||||
{
|
||||
/**
|
||||
* Collection of regexps to check the node usability.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public static $regexps = [
|
||||
'unlikelyCandidates' => '/-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
|
||||
'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
|
||||
'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
|
||||
'byline' => '/byline|author|dateline|writtenby|p-author/i',
|
||||
'replaceFonts' => '/<(\/?)font[^>]*>/gi',
|
||||
'normalize' => '/\s{2,}/',
|
||||
'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i',
|
||||
'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i',
|
||||
'prevLink' => '/(prev|earl|old|new|<|«)/i',
|
||||
'whitespace' => '/^\s*$/',
|
||||
'hasContent' => '/\S$/',
|
||||
'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
|
||||
'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
|
||||
// \x{00A0} is the unicode version of
|
||||
'onlyWhitespace' => '/\x{00A0}|\s+/u'
|
||||
];
|
||||
|
||||
/**
|
||||
* Imported from the Element class on league\html-to-markdown.
|
||||
*
|
||||
* @param $node
|
||||
*
|
||||
* @return DOMElement
|
||||
*/
|
||||
public static function nextElement($node)
|
||||
{
|
||||
$next = $node;
|
||||
while ($next
|
||||
&& $next->nodeType !== XML_ELEMENT_NODE
|
||||
&& $next->isWhitespace()) {
|
||||
$next = $next->nextSibling;
|
||||
}
|
||||
|
||||
return $next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new
|
||||
* element with the new tag name and importing it to the main DOMDocument.
|
||||
*
|
||||
* @param DOMNode $node
|
||||
* @param string $value
|
||||
* @param bool $importAttributes
|
||||
*
|
||||
* @return DOMNode
|
||||
*/
|
||||
public static function setNodeTag($node, $value, $importAttributes = true)
|
||||
{
|
||||
$new = new DOMDocument('1.0', 'utf-8');
|
||||
$new->appendChild($new->createElement($value));
|
||||
|
||||
$children = $node->childNodes;
|
||||
/** @var $children \DOMNodeList $i */
|
||||
for ($i = 0; $i < $children->length; $i++) {
|
||||
$import = $new->importNode($children->item($i), true);
|
||||
$new->firstChild->appendChild($import);
|
||||
}
|
||||
|
||||
if ($importAttributes) {
|
||||
// Import attributes from the original node.
|
||||
foreach ($node->attributes as $attribute) {
|
||||
$new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue);
|
||||
}
|
||||
}
|
||||
|
||||
// The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
|
||||
$import = $node->ownerDocument->importNode($new->firstChild, true);
|
||||
$node->parentNode->replaceChild($import, $node);
|
||||
|
||||
return $import;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the current node and returns the next node to be parsed (child, sibling or parent).
|
||||
*
|
||||
* @param DOMNode $node
|
||||
*
|
||||
* @return DOMNode
|
||||
*/
|
||||
public static function removeAndGetNext($node)
|
||||
{
|
||||
$nextNode = self::getNextNode($node, true);
|
||||
$node->parentNode->removeChild($node);
|
||||
|
||||
return $nextNode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the selected node.
|
||||
*
|
||||
* @param $node DOMElement
|
||||
*
|
||||
* @return void
|
||||
**/
|
||||
public static function removeNode($node)
|
||||
{
|
||||
$parent = $node->parentNode;
|
||||
if ($parent) {
|
||||
$parent->removeChild($node);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
|
||||
* for parents.
|
||||
*
|
||||
* @param DOMNode $originalNode
|
||||
* @param bool $ignoreSelfAndKids
|
||||
*
|
||||
* @return DOMNode
|
||||
*/
|
||||
public static function getNextNode($originalNode, $ignoreSelfAndKids = false)
|
||||
{
|
||||
/*
|
||||
* Traverse the DOM from node to node, starting at the node passed in.
|
||||
* Pass true for the second parameter to indicate this node itself
|
||||
* (and its kids) are going away, and we want the next node over.
|
||||
*
|
||||
* Calling this in a loop will traverse the DOM depth-first.
|
||||
*/
|
||||
|
||||
// First check for kids if those aren't being ignored
|
||||
if (!$ignoreSelfAndKids && $originalNode->firstChild) {
|
||||
return $originalNode->firstChild;
|
||||
}
|
||||
|
||||
// Then for siblings...
|
||||
if ($originalNode->nextSibling) {
|
||||
return $originalNode->nextSibling;
|
||||
}
|
||||
|
||||
// And finally, move up the parent chain *and* find a sibling
|
||||
// (because this is depth-first traversal, we will have already
|
||||
// seen the parent nodes themselves).
|
||||
do {
|
||||
$originalNode = $originalNode->parentNode;
|
||||
} while ($originalNode && !$originalNode->nextSibling);
|
||||
|
||||
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user