Skip to content

prec(-1,...) breaks "word" tokens #3609

Open
@RedCMD

Description

Problem

When prec(-1,...) is added to the "word" token rule ($.other)
Tree-Sitter favors numbers over letters
even tho both the grammar and docs state there is no discrimination between the two
https://tree-sitter.github.io/tree-sitter/creating-parsers#keyword-extraction

image

parse -d
new_parse
process version:0, version_count:1, state:1, row:0, col:0
lex_internal state:5, row:0, column:0
  consume character:'"'
lexed_lookahead sym:", size:1
shift state:4
process version:0, version_count:1, state:4, row:0, col:1
lex_internal state:3, row:0, column:1
  consume character:'1'
  consume character:'t'
  consume character:'r'
  consume character:'u'
  consume character:'e'
  consume character:'@'
lexed_lookahead sym:1true@, size:6
shift state:7
process version:0, version_count:1, state:7, row:0, col:7
lex_internal state:2, row:0, column:7
  skip character:' '
  consume character:'"'
lexed_lookahead sym:", size:2
reduce sym:boolean, child_count:1
shift state:5
process version:0, version_count:1, state:5, row:0, col:9
lex_internal state:5, row:0, column:9
  consume character:13
  consume character:10
lexed_lookahead sym:_whitespace, size:2
reduce sym:string, child_count:3
shift state:3
process version:0, version_count:1, state:3, row:1, col:0
lex_internal state:5, row:1, column:0
  consume character:'"'
lexed_lookahead sym:", size:1
reduce sym:root_repeat1, child_count:2
shift state:4
process version:0, version_count:1, state:4, row:1, col:1
lex_internal state:3, row:1, column:1
  consume character:'f'
  consume character:'a'
  consume character:'l'
  consume character:'s'
  consume character:'e'
  consume character:'@'
  consume character:' '
  consume character:'f'
  consume character:'a'
  consume character:'l'
  consume character:'s'
  consume character:'e'
  consume character:'@'
lexed_lookahead sym:other, size:7
shift state:8
process version:0, version_count:1, state:8, row:1, col:8
lex_internal state:2, row:1, column:8
  consume character:'"'
lexed_lookahead sym:", size:1
shift state:5
process version:0, version_count:1, state:5, row:1, col:9
lex_internal state:5, row:1, column:9
lexed_lookahead sym:end, size:0
reduce sym:string, child_count:3
reduce sym:root_repeat1, child_count:2
reduce sym:root, child_count:1
accept
done
(root [0, 0] - [1, 9]
  (string [0, 0] - [0, 9]
    (boolean [0, 1] - [0, 7]))
  (string [1, 0] - [1, 9]
    (other [1, 1] - [1, 8])))

Steps to reproduce

Generate grammar:

grammar.js
module.exports = grammar({
	name: "prec",
	word: $ => $.other,
	rules: {
		root: $ => repeat(
			choice(
				$._whitespace,
				$.string,
			),
		),

		_whitespace: $ => /\s+/,

		string: $ => seq(
			'"',
			choice(
				$.boolean,
				$.other,
			),
			'"',
		),

		boolean: $ => choice(
			"1true@",
			"false@",
		),
		other: $ => token(
			prec(-1,
				repeat1(
					choice(
						/\\[^\r\n\t]/,
						/[^\\\r\n\t"]+/,
					),
				),
			),
		),
	},
});

parse file:

"1true@ "
"false@ "

Expected behavior

For either:
both nodes to be parsed as type other
OR
both nodes to be parsed as type boolean AND type _whitespace

parse -d
new_parse
process version:0, version_count:1, state:1, row:0, col:0
lex_internal state:4, row:0, column:0
  consume character:'"'
lexed_lookahead sym:", size:1
shift state:4
process version:0, version_count:1, state:4, row:0, col:1
lex_internal state:2, row:0, column:1
  consume character:'1'
  consume character:'t'
  consume character:'r'
  consume character:'u'
  consume character:'e'
  consume character:'@'
  consume character:' '
lexed_lookahead sym:other, size:7
shift state:8
process version:0, version_count:1, state:8, row:0, col:8
lex_internal state:1, row:0, column:8
  consume character:'"'
lexed_lookahead sym:", size:1
shift state:5
process version:0, version_count:1, state:5, row:0, col:9
lex_internal state:4, row:0, column:9
  consume character:13
  consume character:10
lexed_lookahead sym:_whitespace, size:2
reduce sym:string, child_count:3
shift state:3
process version:0, version_count:1, state:3, row:1, col:0
lex_internal state:4, row:1, column:0
  consume character:'"'
lexed_lookahead sym:", size:1
reduce sym:root_repeat1, child_count:2
shift state:4
process version:0, version_count:1, state:4, row:1, col:1
lex_internal state:2, row:1, column:1
  consume character:'f'
  consume character:'a'
  consume character:'l'
  consume character:'s'
  consume character:'e'
  consume character:'@'
  consume character:' '
  consume character:'f'
  consume character:'a'
  consume character:'l'
  consume character:'s'
  consume character:'e'
  consume character:'@'
lexed_lookahead sym:other, size:7
shift state:8
process version:0, version_count:1, state:8, row:1, col:8
lex_internal state:1, row:1, column:8
  consume character:'"'
lexed_lookahead sym:", size:1
shift state:5
process version:0, version_count:1, state:5, row:1, col:9
lex_internal state:4, row:1, column:9
lexed_lookahead sym:end, size:0
reduce sym:string, child_count:3
reduce sym:root_repeat1, child_count:2
reduce sym:root, child_count:1
accept
done
(root [0, 0] - [1, 9]
  (string [0, 0] - [0, 9]
    (other [0, 1] - [0, 8]))
  (string [1, 0] - [1, 9]
    (other [1, 1] - [1, 8])))

Tree-sitter version (tree-sitter --version)

web-tree-sitter 0.23.0

Operating system/version

Windows 11

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    bugparserRelated to parsing

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions