lithium\analysis\Parser

class

The parser class uses PHP's tokenizer to provide methods and tools for performing static analysis on PHP code.

Source

class Parser {

	/**
	 * Convenience method to get the token name of a PHP code string. If multiple tokens are
	 * present in the string, only the first is returned.
	 *
	 * @param string $string String of PHP code to get the token name of, i.e. `'=>'` or `'static'`.
	 * @param array $options
	 * @return mixed
	 */
	public static function token($string, array $options = []) {
		$defaults = ['id' => false];
		$options += $defaults;

		if (empty($string) && $string !== '0') {
			return false;
		}
		list($token) = static::tokenize($string);
		return $token[($options['id']) ? 'id' : 'name'];
	}

	/**
	 * Splits the provided `$code` into PHP language tokens.
	 *
	 * @param string $code Source code to be tokenized.
	 * @param array $options Options consists of:
	 *        -'wrap': Boolean indicating whether or not to wrap the supplied
	 *          code in PHP tags.
	 *        -'ignore': An array containing PHP language tokens to ignore.
	 *        -'include': If supplied, an array of the only language tokens
	 *         to include in the output.
	 * @return array An array of tokens in the supplied source code.
	 */
	public static function tokenize($code, array $options = []) {
		$defaults = ['wrap' => true, 'ignore' => [], 'include' => []];
		$options += $defaults;
		$tokens = [];
		$line = 1;

		if ($options['wrap']) {
			$code = "<?php {$code}?>";
		}
		foreach (token_get_all($code) as $token) {
			$token = (isset($token[1])) ? $token : [null, $token, $line];
			list($id, $content, $line) = $token;
			$name = $id ? token_name($id) : $content;

			if (!empty($options['include'])) {
				if (!in_array($name, $options['include']) && !in_array($id, $options['include'])) {
					continue;
				}
			}

			if (!empty($options['ignore'])) {
				if (in_array($name, $options['ignore']) || in_array($id, $options['ignore'])) {
					continue;
				}
			}
			$tokens[] = ['id' => $id, 'name' => $name, 'content' => $content, 'line' => $line];

			$line += count(preg_split('/\r\n|\r|\n/', $content)) - 1;
		}

		if ($options['wrap'] && empty($options['include'])) {
			$tokens = array_slice($tokens, 1, count($tokens) - 2);
		}
		return $tokens;
	}

	/**
	 * Finds a pattern in a block of code.
	 *
	 * @param string $code
	 * @param string $pattern
	 * @param array $options The list of options to be used when parsing / matching `$code`:
	 *              - 'ignore': An array of token names to ignore while parsing, defaults to
	 *               `['T_WHITESPACE']`
	 *              - 'lineBreaks': If true, all tokens in a single pattern match must appear on the
	 *                same line of code, defaults to false
	 *              - 'startOfLine': If true, the pattern must match starting with the beginning of
	 *                the line of code to be matched, defaults to false
	 * @return array
	 */
	public static function find($code, $pattern, array $options = []) {
		$defaults = [
			'all' => true, 'capture' => [], 'ignore' => ['T_WHITESPACE'],
			'return' => true, 'lineBreaks' => false, 'startOfLine' => false
		];
		$options += $defaults;
		$results = [];
		$matches = [];
		$patternMatch = [];
		$ret = $options['return'];

		$tokens = new Collection(['data' => static::tokenize($code, $options)]);
		$pattern = new Collection(['data' => static::tokenize($pattern, $options)]);

		$breaks = function($token) use (&$tokens, &$matches, &$patternMatch, $options) {
			if (!$options['lineBreaks']) {
				return true;
			}
			if (empty($patternMatch) && !$options['startOfLine']) {
				return true;
			}

			if (empty($patternMatch)) {
				$prev = $tokens->prev();
				$tokens->next();
			} else {
				$prev = reset($patternMatch);
			}

			if (empty($patternMatch) && $options['startOfLine']) {
				return ($token['line'] > $prev['line']);
			}
			return ($token['line'] === $prev['line']);
		};

		$capture = function($token) use (&$matches, &$patternMatch, $tokens, $breaks, $options) {
			if ($token === null) {
				$matches = $patternMatch = [];
				return false;
			}

			if (empty($patternMatch)) {
				$prev = $tokens->prev();
				$tokens->next();
				if ($options['startOfLine'] && $token['line'] === $prev['line']) {
					$patternMatch = $matches = [];
					return false;
				}
			}
			$patternMatch[] = $token;

			if (empty($options['capture']) || !in_array($token['name'], $options['capture'])) {
				return true;
			}
			if (!$breaks($token)) {
				$matches = [];
				return true;
			}
			$matches[] = $token;
			return true;
		};

		$executors = [
			'*' => function(&$tokens, &$pattern) use ($options, $capture) {
				$closing = $pattern->next();
				$tokens->prev();

				while (($t = $tokens->next()) && !Parser::matchToken($closing, $t)) {
					$capture($t);
				}
				$pattern->next();
			}
		];

		$tokens->rewind();
		$pattern->rewind();

		while ($tokens->valid()) {
			if (!$pattern->valid()) {
				$pattern->rewind();

				if (!empty($matches)) {
					$results[] = array_map(
						function($i) use ($ret) { return isset($i[$ret]) ? $i[$ret] : $i; },
						$matches
					);
				}
				$capture(null);
			}

			$p = $pattern->current();
			$t = $tokens->current();

			switch (true) {
				case (static::matchToken($p, $t)):
					$capture($t) ? $pattern->next() : $pattern->rewind();
				break;
				case (isset($executors[$p['name']])):
					$exec = $executors[$p['name']];
					$exec($tokens, $pattern);
				break;
				default:
					$capture(null);
					$pattern->rewind();
				break;
			}
			$tokens->next();
		}
		return $results;
	}

	/**
	 * Token pattern matching.
	 *
	 * @param string $code Source code to be analyzed.
	 * @param string $parameters An array containing token patterns to be matched.
	 * @param array $options The list of options to be used when matching `$code`:
	 *              - 'ignore': An array of language tokens to ignore.
	 *              - 'return': If set to 'content' returns an array of matching tokens.
	 * @return array Array of matching tokens.
	 */
	public static function match($code, $parameters, array $options = []) {
		$defaults = ['ignore' => ['T_WHITESPACE'], 'return' => true];
		$options += $defaults;
		$parameters = static::_prepareMatchParams($parameters);

		$tokens = is_array($code) ? $code : static::tokenize($code, $options);
		$results = [];

		foreach ($tokens as $i => $token) {
			if (!array_key_exists($token['name'], $parameters)) {
				if (!in_array('*', $parameters)) {
					continue;
				}
			}
			$param = $parameters[$token['name']];

			if (isset($param['before']) && $i > 0) {
				if (!in_array($tokens[$i - 1]['name'], (array) $param['before'])) {
					continue;
				}
			}

			if (isset($param['after']) && $i + 1 < count($tokens)) {
				 if (!in_array($tokens[$i + 1]['name'], (array) $param['after'])) {
					continue;
				}
			}
			$results[] = isset($token[$options['return']]) ? $token[$options['return']] : $token;
		}
		return $results;
	}

	/**
	 * Compares two PHP language tokens.
	 *
	 * @param array $pattern Pattern token.
	 * @param array $token Token to be compared.
	 * @return boolean Match result.
	 */
	public static function matchToken($pattern, $token) {
		if ($pattern['name'] !== $token['name']) {
			return false;
		}

		if (!isset($pattern['content'])) {
			return true;
		}

		$match = $pattern['content'];
		$content = $token['content'];

		if ($pattern['name'] === 'T_VARIABLE') {
			$match = substr($match, 1);
			$content = substr($content, 1);
		}

		switch (true) {
			case ($match === '_' || $match === $content):
				return true;
		}
		return false;
	}

	/**
	 * Helper function to normalize parameters for token matching.
	 *
	 * @see lithium\analysis\Parser::match()
	 * @param array|string $parameters Params to be normalized.
	 * @return array Normalized parameters.
	 */
	protected static function _prepareMatchParams($parameters) {
		foreach (Set::normalize($parameters) as $token => $scope) {
			if (strpos($token, 'T_') !== 0) {
				unset($parameters[$token]);

				foreach (['before', 'after'] as $key) {
					if (!isset($scope[$key])) {
						continue;
					}
					$items = [];

					foreach ((array) $scope[$key] as $item) {
						$items[] = (strpos($item, 'T_') !== 0)  ? static::token($item) : $item;
					}
					$scope[$key] = $items;
				}
				$parameters[static::token($token)] = $scope;
			}
		}
		return $parameters;
	}
}