); return; } if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) { $this->sought_tag_name = $query['tag_name']; } if ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) { $this->sought_class_name = $query['class_name']; } if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) { $this->sought_match_offset = $query['match_offset']; } if ( isset( $query['tag_closers'] ) ) { $this->stop_on_tag_closers = 'visit' === $query['tag_closers']; } } /** * Checks whether a given tag and its attributes match the search criteria. * * @since 6.2.0 * * @return bool Whether the given tag and its attribute match the search criteria. */ private function matches() { if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) { return false; } // Does the tag name match the requested tag name in a case-insensitive manner? if ( null !== $this->sought_tag_name ) { /* * String (byte) length lookup is fast. If they aren't the * same length then they can't be the same string values. */ if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) { return false; } /* * Check each character to determine if they are the same. * Defer calls to `strtoupper()` to avoid them when possible. * Calling `strcasecmp()` here tested slowed than comparing each * character, so unless benchmarks show otherwise, it should * not be used. * * It's expected that most of the time that this runs, a * lower-case tag name will be supplied and the input will * contain lower-case tag names, thus normally bypassing * the case comparison code. */ for ( $i = 0; $i < $this->tag_name_length; $i++ ) { $html_char = $this->html[ $this->tag_name_starts_at + $i ]; $tag_char = $this->sought_tag_name[ $i ]; if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { return false; } } } if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) { return false; } return true; } /** * Parser Ready State. * * Indicates that the parser is ready to run and waiting for a state transition. * It may not have started yet, or it may have just finished parsing a token and * is ready to find the next one. * * @since 6.5.0 * * @access private */ const STATE_READY = 'STATE_READY'; /** * Parser Complete State. * * Indicates that the parser has reached the end of the document and there is * nothing left to scan. It finished parsing the last token completely. * * @since 6.5.0 * * @access private */ const STATE_COMPLETE = 'STATE_COMPLETE'; /** * Parser Incomplete Input State. * * Indicates that the parser has reached the end of the document before finishing * a token. It started parsing a token but there is a possibility that the input * HTML document was truncated in the middle of a token. * * The parser is reset at the start of the incomplete token and has paused. There * is nothing more than can be scanned unless provided a more complete document. * * @since 6.5.0 * * @access private */ const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT'; /** * Parser Matched Tag State. * * Indicates that the parser has found an HTML tag and it's possible to get * the tag name and read or modify its attributes (if it's not a closing tag). * * @since 6.5.0 * * @access private */ const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG'; /** * Parser Text Node State. * * Indicates that the parser has found a text node and it's possible * to read and modify that text. * * @since 6.5.0 * * @access private */ const STATE_TEXT_NODE = 'STATE_TEXT_NODE'; /** * Parser CDATA Node State. * * Indicates that the parser has found a CDATA node and it's possible * to read and modify its modifiable text. Note that in HTML there are * no CDATA nodes outside of foreign content (SVG and MathML). Outside * of foreign content, they are treated as HTML comments. * * @since 6.5.0 * * @access private */ const STATE_CDATA_NODE = 'STATE_CDATA_NODE'; /** * Indicates that the parser has found an HTML comment and it's * possible to read and modify its modifiable text. * * @since 6.5.0 * * @access private */ const STATE_COMMENT = 'STATE_COMMENT'; /** * Indicates that the parser has found a DOCTYPE node and it's * possible to read and modify its modifiable text. * * @since 6.5.0 * * @access private */ const STATE_DOCTYPE = 'STATE_DOCTYPE'; /** * Indicates that the parser has found an empty tag closer ``. * * Note that in HTML there are no empty tag closers, and they * are ignored. Nonetheless, the Tag Processor still * recognizes them as they appear in the HTML stream. * * These were historically discussed as a "presumptuous tag * closer," which would close the nearest open tag, but were * dismissed in favor of explicitly-closing tags. * * @since 6.5.0 * * @access private */ const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG'; /** * Indicates that the parser has found a "funky comment" * and it's possible to read and modify its modifiable text. * * Example: * * * * * * Funky comments are tag closers with invalid tag names. Note * that in HTML these are turn into bogus comments. Nonetheless, * the Tag Processor recognizes them in a stream of HTML and * exposes them for inspection and modification. * * @since 6.5.0 * * @access private */ const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY'; /** * Indicates that a comment was created when encountering abruptly-closed HTML comment. * * Example: * * * * * @since 6.5.0 */ const COMMENT_AS_ABRUPTLY_CLOSED_COMMENT = 'COMMENT_AS_ABRUPTLY_CLOSED_COMMENT'; /** * Indicates that a comment would be parsed as a CDATA node, * were HTML to allow CDATA nodes outside of foreign content. * * Example: * * * * This is an HTML comment, but it looks like a CDATA node. * * @since 6.5.0 */ const COMMENT_AS_CDATA_LOOKALIKE = 'COMMENT_AS_CDATA_LOOKALIKE'; /** * Indicates that a comment was created when encountering * normative HTML comment syntax. * * Example: * * * * @since 6.5.0 */ const COMMENT_AS_HTML_COMMENT = 'COMMENT_AS_HTML_COMMENT'; /** * Indicates that a comment would be parsed as a Processing * Instruction node, were they to exist within HTML. * * Example: * * * * This is an HTML comment, but it looks like a CDATA node. * * @since 6.5.0 */ const COMMENT_AS_PI_NODE_LOOKALIKE = 'COMMENT_AS_PI_NODE_LOOKALIKE'; /** * Indicates that a comment was created when encountering invalid * HTML input, a so-called "bogus comment." * * Example: * * * * * @since 6.5.0 */ const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML'; }