Automated build for v0.01
This commit is contained in:
		
						commit
						791b998489
					
				
					 2771 changed files with 222096 additions and 0 deletions
				
			
		
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMAttr.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMAttr.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMAttr extends \DOMAttr | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMCdataSection.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMCdataSection.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMCdataSection extends \DOMCdataSection | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMCharacterData.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMCharacterData.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMCharacterData extends \DOMCharacterData | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMComment.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMComment.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMComment extends \DOMComment | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										30
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMDocument.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMDocument.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMDocument extends \DOMDocument | ||||
| { | ||||
|     use NodeTrait; | ||||
| 
 | ||||
|     public function __construct($version, $encoding) | ||||
|     { | ||||
|         parent::__construct($version, $encoding); | ||||
| 
 | ||||
|         $this->registerNodeClass('DOMAttr', DOMAttr::class); | ||||
|         $this->registerNodeClass('DOMCdataSection', DOMCdataSection::class); | ||||
|         $this->registerNodeClass('DOMCharacterData', DOMCharacterData::class); | ||||
|         $this->registerNodeClass('DOMComment', DOMComment::class); | ||||
|         $this->registerNodeClass('DOMDocument', self::class); | ||||
|         $this->registerNodeClass('DOMDocumentFragment', DOMDocumentFragment::class); | ||||
|         $this->registerNodeClass('DOMDocumentType', DOMDocumentType::class); | ||||
|         $this->registerNodeClass('DOMElement', DOMElement::class); | ||||
|         $this->registerNodeClass('DOMEntity', DOMEntity::class); | ||||
|         $this->registerNodeClass('DOMEntityReference', DOMEntityReference::class); | ||||
|         $this->registerNodeClass('DOMNode', DOMNode::class); | ||||
|         $this->registerNodeClass('DOMNotation', DOMNotation::class); | ||||
|         $this->registerNodeClass('DOMProcessingInstruction', DOMProcessingInstruction::class); | ||||
|         $this->registerNodeClass('DOMText', DOMText::class); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMDocumentFragment.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMDocumentFragment.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMDocumentFragment extends \DOMDocumentFragment | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMDocumentType.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMDocumentType.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMDocumentType extends \DOMDocumentType | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMElement.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMElement.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMElement extends \DOMElement | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMEntity.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMEntity.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMEntity extends \DOMEntity | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMEntityReference.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMEntityReference.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMEntityReference extends \DOMEntityReference | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										14
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMNode.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMNode.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,14 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| /** | ||||
|  * @method getAttribute($attribute) | ||||
|  * @method hasAttribute($attribute) | ||||
|  */ | ||||
| class DOMNode extends \DOMNode | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMNotation.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMNotation.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMNotation extends \DOMNotation | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMProcessingInstruction.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMProcessingInstruction.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMProcessingInstruction extends \DOMProcessingInstruction | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMText.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								vendor/andreskrey/Readability/Nodes/DOM/DOMText.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes\DOM; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\NodeTrait; | ||||
| 
 | ||||
| class DOMText extends \DOMText | ||||
| { | ||||
|     use NodeTrait; | ||||
| } | ||||
							
								
								
									
										560
									
								
								vendor/andreskrey/Readability/Nodes/NodeTrait.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										560
									
								
								vendor/andreskrey/Readability/Nodes/NodeTrait.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,560 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\DOM\DOMDocument; | ||||
| use andreskrey\Readability\Nodes\DOM\DOMElement; | ||||
| use andreskrey\Readability\Nodes\DOM\DOMNode; | ||||
| use andreskrey\Readability\Nodes\DOM\DOMText; | ||||
| use DOMNodeList; | ||||
| 
 | ||||
| /** | ||||
|  * @method \DOMNode removeAttribute($name) | ||||
|  */ | ||||
| trait NodeTrait | ||||
| { | ||||
|     /** | ||||
|      * Content score of the node. Used to determine the value of the content. | ||||
|      * | ||||
|      * @var int | ||||
|      */ | ||||
|     public $contentScore = 0; | ||||
| 
 | ||||
|     /** | ||||
|      * Flag for initialized status. | ||||
|      * | ||||
|      * @var bool | ||||
|      */ | ||||
|     private $initialized = false; | ||||
| 
 | ||||
|     /** | ||||
|      * Flag data tables. | ||||
|      * | ||||
|      * @var bool | ||||
|      */ | ||||
|     private $readabilityDataTable = false; | ||||
| 
 | ||||
|     /** | ||||
|      * @var array | ||||
|      */ | ||||
|     private $divToPElements = [ | ||||
|         'a', | ||||
|         'blockquote', | ||||
|         'dl', | ||||
|         'div', | ||||
|         'img', | ||||
|         'ol', | ||||
|         'p', | ||||
|         'pre', | ||||
|         'table', | ||||
|         'ul', | ||||
|         'select', | ||||
|     ]; | ||||
| 
 | ||||
|     /** | ||||
|      * The commented out elements qualify as phrasing content but tend to be | ||||
|      * removed by readability when put into paragraphs, so we ignore them here. | ||||
|      * | ||||
|      * @var array | ||||
|      */ | ||||
|     private $phrasing_elems = [ | ||||
|         // 'CANVAS', 'IFRAME', 'SVG', 'VIDEO',
 | ||||
|         'abbr', 'audio', 'b', 'bdo', 'br', 'button', 'cite', 'code', 'data', | ||||
|         'datalist', 'dfn', 'em', 'embed', 'i', 'img', 'input', 'kbd', 'label', | ||||
|         'mark', 'math', 'meter', 'noscript', 'object', 'output', 'progress', 'q', | ||||
|         'ruby', 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', | ||||
|         'sup', 'textarea', 'time', 'var', 'wbr' | ||||
|     ]; | ||||
| 
 | ||||
|     /** | ||||
|      * initialized getter. | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function isInitialized() | ||||
|     { | ||||
|         return $this->initialized; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function isReadabilityDataTable() | ||||
|     { | ||||
|         /* | ||||
|          * This is a workaround that I'd like to remove in the future. | ||||
|          * Seems that although we are extending the base DOMElement and adding custom properties (like this one, | ||||
|          * 'readabilityDataTable'), these properties get lost when you search for elements with getElementsByTagName. | ||||
|          * This means that even if we mark the tables in a previous step, when we want to retrieve that information, | ||||
|          * all the custom properties are in their default values. Somehow we need to find a way to make these properties | ||||
|          * permanent across the whole DOM. | ||||
|          * | ||||
|          * @see https://stackoverflow.com/questions/35654709/php-registernodeclass-and-reusing-variable-names | ||||
|          */ | ||||
|         return $this->hasAttribute('readabilityDataTable') | ||||
|             && $this->getAttribute('readabilityDataTable') === '1'; | ||||
| //        return $this->readabilityDataTable;
 | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @param bool $param | ||||
|      */ | ||||
|     public function setReadabilityDataTable($param) | ||||
|     { | ||||
|         // Can't be "true" because DOMDocument casts it to "1"
 | ||||
|         $this->setAttribute('readabilityDataTable', $param ? '1' : '0'); | ||||
| //        $this->readabilityDataTable = $param;
 | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Initializer. Calculates the current score of the node and returns a full Readability object. | ||||
|      * | ||||
|      * @ TODO: I don't like the weightClasses param. How can we get the config here? | ||||
|      * | ||||
|      * @param $weightClasses bool Weight classes? | ||||
|      * | ||||
|      * @return static | ||||
|      */ | ||||
|     public function initializeNode($weightClasses) | ||||
|     { | ||||
|         if (!$this->isInitialized()) { | ||||
|             $contentScore = 0; | ||||
| 
 | ||||
|             switch ($this->nodeName) { | ||||
|                 case 'div': | ||||
|                     $contentScore += 5; | ||||
|                     break; | ||||
| 
 | ||||
|                 case 'pre': | ||||
|                 case 'td': | ||||
|                 case 'blockquote': | ||||
|                     $contentScore += 3; | ||||
|                     break; | ||||
| 
 | ||||
|                 case 'address': | ||||
|                 case 'ol': | ||||
|                 case 'ul': | ||||
|                 case 'dl': | ||||
|                 case 'dd': | ||||
|                 case 'dt': | ||||
|                 case 'li': | ||||
|                 case 'form': | ||||
|                     $contentScore -= 3; | ||||
|                     break; | ||||
| 
 | ||||
|                 case 'h1': | ||||
|                 case 'h2': | ||||
|                 case 'h3': | ||||
|                 case 'h4': | ||||
|                 case 'h5': | ||||
|                 case 'h6': | ||||
|                 case 'th': | ||||
|                     $contentScore -= 5; | ||||
|                     break; | ||||
|             } | ||||
| 
 | ||||
|             $this->contentScore = $contentScore + ($weightClasses ? $this->getClassWeight() : 0); | ||||
| 
 | ||||
|             $this->initialized = true; | ||||
|         } | ||||
| 
 | ||||
|         return $this; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Override for native getAttribute method. Some nodes have the getAttribute method, some don't, so we need | ||||
|      * to check first the existence of the attributes property. | ||||
|      * | ||||
|      * @param $attributeName string Attribute to retrieve | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     public function getAttribute($attributeName) | ||||
|     { | ||||
|         if (!is_null($this->attributes)) { | ||||
|             return parent::getAttribute($attributeName); | ||||
|         } | ||||
| 
 | ||||
|         return ''; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Override for native hasAttribute. | ||||
|      * | ||||
|      * @see getAttribute | ||||
|      * | ||||
|      * @param $attributeName | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function hasAttribute($attributeName) | ||||
|     { | ||||
|         if (!is_null($this->attributes)) { | ||||
|             return parent::hasAttribute($attributeName); | ||||
|         } | ||||
| 
 | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Get the ancestors of the current node. | ||||
|      * | ||||
|      * @param int|bool $maxLevel Max amount of ancestors to get. False for all of them | ||||
|      * | ||||
|      * @return array | ||||
|      */ | ||||
|     public function getNodeAncestors($maxLevel = 3) | ||||
|     { | ||||
|         $ancestors = []; | ||||
|         $level = 0; | ||||
| 
 | ||||
|         $node = $this->parentNode; | ||||
| 
 | ||||
|         while ($node && !($node instanceof DOMDocument)) { | ||||
|             $ancestors[] = $node; | ||||
|             $level++; | ||||
|             if ($level === $maxLevel) { | ||||
|                 break; | ||||
|             } | ||||
|             $node = $node->parentNode; | ||||
|         } | ||||
| 
 | ||||
|         return $ancestors; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Returns all links from the current element. | ||||
|      * | ||||
|      * @return array | ||||
|      */ | ||||
|     public function getAllLinks() | ||||
|     { | ||||
|         return iterator_to_array($this->getElementsByTagName('a')); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Get the density of links as a percentage of the content | ||||
|      * This is the amount of text that is inside a link divided by the total text in the node. | ||||
|      * | ||||
|      * @return int | ||||
|      */ | ||||
|     public function getLinkDensity() | ||||
|     { | ||||
|         $linkLength = 0; | ||||
|         $textLength = mb_strlen($this->getTextContent(true)); | ||||
| 
 | ||||
|         if (!$textLength) { | ||||
|             return 0; | ||||
|         } | ||||
| 
 | ||||
|         $links = $this->getAllLinks(); | ||||
| 
 | ||||
|         if ($links) { | ||||
|             /** @var DOMElement $link */ | ||||
|             foreach ($links as $link) { | ||||
|                 $linkLength += mb_strlen($link->getTextContent(true)); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return $linkLength / $textLength; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Calculates the weight of the class/id of the current element. | ||||
|      * | ||||
|      * @return int | ||||
|      */ | ||||
|     public function getClassWeight() | ||||
|     { | ||||
|         $weight = 0; | ||||
| 
 | ||||
|         // Look for a special classname
 | ||||
|         $class = $this->getAttribute('class'); | ||||
|         if (trim($class)) { | ||||
|             if (preg_match(NodeUtility::$regexps['negative'], $class)) { | ||||
|                 $weight -= 25; | ||||
|             } | ||||
| 
 | ||||
|             if (preg_match(NodeUtility::$regexps['positive'], $class)) { | ||||
|                 $weight += 25; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // Look for a special ID
 | ||||
|         $id = $this->getAttribute('id'); | ||||
|         if (trim($id)) { | ||||
|             if (preg_match(NodeUtility::$regexps['negative'], $id)) { | ||||
|                 $weight -= 25; | ||||
|             } | ||||
| 
 | ||||
|             if (preg_match(NodeUtility::$regexps['positive'], $id)) { | ||||
|                 $weight += 25; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return $weight; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Returns the full text of the node. | ||||
|      * | ||||
|      * @param bool $normalize Normalize white space? | ||||
|      * | ||||
|      * @return string | ||||
|      */ | ||||
|     public function getTextContent($normalize = false) | ||||
|     { | ||||
|         $nodeValue = $this->nodeValue; | ||||
|         if ($normalize) { | ||||
|             $nodeValue = trim(preg_replace('/\s{2,}/', ' ', $nodeValue)); | ||||
|         } | ||||
| 
 | ||||
|         return $nodeValue; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Returns the children of the current node. | ||||
|      * | ||||
|      * @param bool $filterEmptyDOMText Filter empty DOMText nodes? | ||||
|      * | ||||
|      * @return array | ||||
|      */ | ||||
|     public function getChildren($filterEmptyDOMText = false) | ||||
|     { | ||||
|         $ret = iterator_to_array($this->childNodes); | ||||
|         if ($filterEmptyDOMText) { | ||||
|             // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
 | ||||
|             $ret = array_values(array_filter($ret, function ($node) { | ||||
|                 return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue)); | ||||
|             })); | ||||
|         } | ||||
| 
 | ||||
|         return $ret; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Return an array indicating how many rows and columns this table has. | ||||
|      * | ||||
|      * @return array | ||||
|      */ | ||||
|     public function getRowAndColumnCount() | ||||
|     { | ||||
|         $rows = $columns = 0; | ||||
|         $trs = $this->getElementsByTagName('tr'); | ||||
|         foreach ($trs as $tr) { | ||||
|             /** @var \DOMElement $tr */ | ||||
|             $rowspan = $tr->getAttribute('rowspan'); | ||||
|             $rows += ($rowspan || 1); | ||||
| 
 | ||||
|             // Now look for column-related info
 | ||||
|             $columnsInThisRow = 0; | ||||
|             $cells = $tr->getElementsByTagName('td'); | ||||
|             foreach ($cells as $cell) { | ||||
|                 /** @var \DOMElement $cell */ | ||||
|                 $colspan = $cell->getAttribute('colspan'); | ||||
|                 $columnsInThisRow += ($colspan || 1); | ||||
|             } | ||||
|             $columns = max($columns, $columnsInThisRow); | ||||
|         } | ||||
| 
 | ||||
|         return ['rows' => $rows, 'columns' => $columns]; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Creates a new node based on the text content of the original node. | ||||
|      * | ||||
|      * @param $originalNode DOMNode | ||||
|      * @param $tagName string | ||||
|      * | ||||
|      * @return DOMElement | ||||
|      */ | ||||
|     public function createNode($originalNode, $tagName) | ||||
|     { | ||||
|         $text = $originalNode->getTextContent(); | ||||
|         $newNode = $originalNode->ownerDocument->createElement($tagName, $text); | ||||
| 
 | ||||
|         return $newNode; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Check if a given node has one of its ancestor tag name matching the | ||||
|      * provided one. | ||||
|      * | ||||
|      * @param string $tagName | ||||
|      * @param int $maxDepth | ||||
|      * @param callable $filterFn | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function hasAncestorTag($tagName, $maxDepth = 3, callable $filterFn = null) | ||||
|     { | ||||
|         $depth = 0; | ||||
|         $node = $this; | ||||
| 
 | ||||
|         while ($node->parentNode) { | ||||
|             if ($maxDepth > 0 && $depth > $maxDepth) { | ||||
|                 return false; | ||||
|             } | ||||
| 
 | ||||
|             if ($node->parentNode->nodeName === $tagName && (!$filterFn || $filterFn($node->parentNode))) { | ||||
|                 return true; | ||||
|             } | ||||
| 
 | ||||
|             $node = $node->parentNode; | ||||
|             $depth++; | ||||
|         } | ||||
| 
 | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Check if this node has only whitespace and a single element with given tag | ||||
|      * or if it contains no element with given tag or more than 1 element. | ||||
|      * | ||||
|      * @param $tag string Name of tag | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function hasSingleTagInsideElement($tag) | ||||
|     { | ||||
|         // There should be exactly 1 element child with given tag
 | ||||
|         if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) { | ||||
|             return false; | ||||
|         } | ||||
| 
 | ||||
|         // And there should be no text nodes with real content
 | ||||
|         return array_reduce($children, function ($carry, $child) { | ||||
|             if (!$carry === false) { | ||||
|                 return false; | ||||
|             } | ||||
| 
 | ||||
|             /* @var DOMNode $child */ | ||||
|             return !($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent())); | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Check if the current element has a single child block element. | ||||
|      * Block elements are the ones defined in the divToPElements array. | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function hasSingleChildBlockElement() | ||||
|     { | ||||
|         $result = false; | ||||
|         if ($this->hasChildNodes()) { | ||||
|             foreach ($this->getChildren() as $child) { | ||||
|                 if (in_array($child->nodeName, $this->divToPElements)) { | ||||
|                     $result = true; | ||||
|                 } else { | ||||
|                     // If any of the hasSingleChildBlockElement calls return true, return true then.
 | ||||
|                     /** @var $child DOMElement */ | ||||
|                     $result = ($result || $child->hasSingleChildBlockElement()); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         return $result; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Determines if a node has no content or it is just a bunch of dividing lines and/or whitespace. | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function isElementWithoutContent() | ||||
|     { | ||||
|         return $this instanceof DOMElement && | ||||
|             mb_strlen(preg_replace(NodeUtility::$regexps['onlyWhitespace'], '', $this->textContent)) === 0 && | ||||
|             ($this->childNodes->length === 0 || | ||||
|                 $this->childNodes->length === $this->getElementsByTagName('br')->length + $this->getElementsByTagName('hr')->length | ||||
|                 /* | ||||
|                  * Special PHP DOMDocument case: We also need to count how many DOMText we have inside the node. | ||||
|                  * If there's an empty tag with an space inside and a BR (for example "<p> <br/></p>) counting only BRs and
 | ||||
|                  * HRs will will say that the example has 2 nodes, instead of one. This happens because in DOMDocument, | ||||
|                  * DOMTexts are also nodes (which doesn't happen in JS). So we need to also count how many DOMText we | ||||
|                  * are dealing with (And at this point we know they are empty or are just whitespace, because of the | ||||
|                  * mb_strlen in this chain of checks). | ||||
|                  */ | ||||
|                 + count(array_filter(iterator_to_array($this->childNodes), function ($child) { | ||||
|                     return $child instanceof DOMText; | ||||
|                 })) | ||||
| 
 | ||||
|             ); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Determine if a node qualifies as phrasing content. | ||||
|      * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content.
 | ||||
|      * | ||||
|      * @return bool | ||||
|      */ | ||||
|     public function isPhrasingContent() | ||||
|     { | ||||
|         return $this->nodeType === XML_TEXT_NODE || in_array($this->nodeName, $this->phrasing_elems) !== false || | ||||
|             (!is_null($this->childNodes) && | ||||
|                 ($this->nodeName === 'a' || $this->nodeName === 'del' || $this->nodeName === 'ins') && | ||||
|                 array_reduce(iterator_to_array($this->childNodes), function ($carry, $node) { | ||||
|                     return $node->isPhrasingContent() && $carry; | ||||
|                 }, true) | ||||
|             ); | ||||
|     } | ||||
| 
 | ||||
|     public function isProbablyVisible() | ||||
|     { | ||||
|         /* | ||||
|          * In the original JS project they check if the node has the style display=none, which unfortunately | ||||
|          * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none". | ||||
|          * | ||||
|          * Might be a good idea to check for classes or other attributes like 'aria-hidden' | ||||
|          */ | ||||
| 
 | ||||
|         return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden'); | ||||
|     } | ||||
| 
 | ||||
|     public function isWhitespace() | ||||
|     { | ||||
|         return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) || | ||||
|             ($this->nodeType === XML_ELEMENT_NODE && $this->nodeName === 'br'); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * This is a hack that overcomes the issue of node shifting when scanning and removing nodes. | ||||
|      * | ||||
|      * In the JS version of getElementsByTagName, if you remove a node it will not appear during the | ||||
|      * foreach. This does not happen in PHP DOMDocument, because if you remove a node, it will still appear but as an | ||||
|      * orphan node and will give an exception if you try to do anything with it. | ||||
|      * | ||||
|      * Shifting also occurs when converting parent nodes (like a P to a DIV), which in that case the found nodes are | ||||
|      * removed from the foreach "pool" but the internal index of the foreach is not aware and skips over nodes that | ||||
|      * never looped over. (index is at position 5, 2 nodes are removed, next one should be node 3, but the foreach tries | ||||
|      * to access node 6) | ||||
|      * | ||||
|      * This function solves this by searching for the nodes on every loop and keeping track of the count differences. | ||||
|      * Because on every loop we call getElementsByTagName again, this could cause a performance impact and should be | ||||
|      * used only when the results of the search are going to be used to remove the nodes. | ||||
|      * | ||||
|      * @param string $tag | ||||
|      * | ||||
|      * @return \Generator | ||||
|      */ | ||||
|     public function shiftingAwareGetElementsByTagName($tag) | ||||
|     { | ||||
|         /** @var $nodes DOMNodeList */ | ||||
|         $nodes = $this->getElementsByTagName($tag); | ||||
|         $count = $nodes->length; | ||||
| 
 | ||||
|         for ($i = 0; $i < $count; $i = max(++$i, 0)) { | ||||
|             yield $nodes->item($i); | ||||
| 
 | ||||
|             // Search for all the nodes again
 | ||||
|             $nodes = $this->getElementsByTagName($tag); | ||||
| 
 | ||||
|             // Subtract the amount of nodes removed from the current index
 | ||||
|             $i -= $count - $nodes->length; | ||||
| 
 | ||||
|             // Subtract the amount of nodes removed from the current count
 | ||||
|             $count -= ($count - $nodes->length); | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										160
									
								
								vendor/andreskrey/Readability/Nodes/NodeUtility.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										160
									
								
								vendor/andreskrey/Readability/Nodes/NodeUtility.php
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,160 @@ | |||
| <?php | ||||
| 
 | ||||
| namespace andreskrey\Readability\Nodes; | ||||
| 
 | ||||
| use andreskrey\Readability\Nodes\DOM\DOMDocument; | ||||
| use andreskrey\Readability\Nodes\DOM\DOMElement; | ||||
| use andreskrey\Readability\Nodes\DOM\DOMNode; | ||||
| 
 | ||||
| /** | ||||
|  * Class NodeUtility. | ||||
|  */ | ||||
| class NodeUtility | ||||
| { | ||||
|     /** | ||||
|      * Collection of regexps to check the node usability. | ||||
|      * | ||||
|      * @var array | ||||
|      */ | ||||
|     public static $regexps = [ | ||||
|         'unlikelyCandidates' => '/-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', | ||||
|         'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', | ||||
|         'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', | ||||
|         'byline' => '/byline|author|dateline|writtenby|p-author/i', | ||||
|         'replaceFonts' => '/<(\/?)font[^>]*>/gi', | ||||
|         'normalize' => '/\s{2,}/', | ||||
|         'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i', | ||||
|         'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i', | ||||
|         'prevLink' => '/(prev|earl|old|new|<|«)/i', | ||||
|         'whitespace' => '/^\s*$/', | ||||
|         'hasContent' => '/\S$/', | ||||
|         'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i', | ||||
|         'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', | ||||
|         // \x{00A0} is the unicode version of  
 | ||||
|         'onlyWhitespace' => '/\x{00A0}|\s+/u' | ||||
|     ]; | ||||
| 
 | ||||
|     /** | ||||
|      * Imported from the Element class on league\html-to-markdown. | ||||
|      * | ||||
|      * @param $node | ||||
|      * | ||||
|      * @return DOMElement | ||||
|      */ | ||||
|     public static function nextElement($node) | ||||
|     { | ||||
|         $next = $node; | ||||
|         while ($next | ||||
|             && $next->nodeType !== XML_ELEMENT_NODE | ||||
|             && $next->isWhitespace()) { | ||||
|             $next = $next->nextSibling; | ||||
|         } | ||||
| 
 | ||||
|         return $next; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new | ||||
|      * element with the new tag name and importing it to the main DOMDocument. | ||||
|      * | ||||
|      * @param DOMNode $node | ||||
|      * @param string $value | ||||
|      * @param bool $importAttributes | ||||
|      * | ||||
|      * @return DOMNode | ||||
|      */ | ||||
|     public static function setNodeTag($node, $value, $importAttributes = true) | ||||
|     { | ||||
|         $new = new DOMDocument('1.0', 'utf-8'); | ||||
|         $new->appendChild($new->createElement($value)); | ||||
| 
 | ||||
|         $children = $node->childNodes; | ||||
|         /** @var $children \DOMNodeList $i */ | ||||
|         for ($i = 0; $i < $children->length; $i++) { | ||||
|             $import = $new->importNode($children->item($i), true); | ||||
|             $new->firstChild->appendChild($import); | ||||
|         } | ||||
| 
 | ||||
|         if ($importAttributes) { | ||||
|             // Import attributes from the original node.
 | ||||
|             foreach ($node->attributes as $attribute) { | ||||
|                 $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
 | ||||
|         $import = $node->ownerDocument->importNode($new->firstChild, true); | ||||
|         $node->parentNode->replaceChild($import, $node); | ||||
| 
 | ||||
|         return $import; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Removes the current node and returns the next node to be parsed (child, sibling or parent). | ||||
|      * | ||||
|      * @param DOMNode $node | ||||
|      * | ||||
|      * @return DOMNode | ||||
|      */ | ||||
|     public static function removeAndGetNext($node) | ||||
|     { | ||||
|         $nextNode = self::getNextNode($node, true); | ||||
|         $node->parentNode->removeChild($node); | ||||
| 
 | ||||
|         return $nextNode; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Remove the selected node. | ||||
|      * | ||||
|      * @param $node DOMElement | ||||
|      * | ||||
|      * @return void | ||||
|      **/ | ||||
|     public static function removeNode($node) | ||||
|     { | ||||
|         $parent = $node->parentNode; | ||||
|         if ($parent) { | ||||
|             $parent->removeChild($node); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally | ||||
|      * for parents. | ||||
|      * | ||||
|      * @param DOMNode $originalNode | ||||
|      * @param bool $ignoreSelfAndKids | ||||
|      * | ||||
|      * @return DOMNode | ||||
|      */ | ||||
|     public static function getNextNode($originalNode, $ignoreSelfAndKids = false) | ||||
|     { | ||||
|         /* | ||||
|          * Traverse the DOM from node to node, starting at the node passed in. | ||||
|          * Pass true for the second parameter to indicate this node itself | ||||
|          * (and its kids) are going away, and we want the next node over. | ||||
|          * | ||||
|          * Calling this in a loop will traverse the DOM depth-first. | ||||
|          */ | ||||
| 
 | ||||
|         // First check for kids if those aren't being ignored
 | ||||
|         if (!$ignoreSelfAndKids && $originalNode->firstChild) { | ||||
|             return $originalNode->firstChild; | ||||
|         } | ||||
| 
 | ||||
|         // Then for siblings...
 | ||||
|         if ($originalNode->nextSibling) { | ||||
|             return $originalNode->nextSibling; | ||||
|         } | ||||
| 
 | ||||
|         // And finally, move up the parent chain *and* find a sibling
 | ||||
|         // (because this is depth-first traversal, we will have already
 | ||||
|         // seen the parent nodes themselves).
 | ||||
|         do { | ||||
|             $originalNode = $originalNode->parentNode; | ||||
|         } while ($originalNode && !$originalNode->nextSibling); | ||||
| 
 | ||||
|         return ($originalNode) ? $originalNode->nextSibling : $originalNode; | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fmstrat
						Fmstrat