Skip to content

Commit fd6a02d

Browse files
Allow leading underscore in type labels (#435)
## Product change and motivation We relax our identifier rules, allowing `_` as a leading underscore in type labels. This does NOT extend to variables, since we have some complexity around anonymous variables for the time being we don't want to modify. ## Implementation Update the pest grammar rule IDENTIFIER_LABEL_H and the is_valid_identifier regex to accept '_' as a valid first character for type labels. Variable identifiers are intentionally unchanged. We also simplify the available identifier validation functions to match the updated grammar using XID_START and CONTINUE
1 parent 05a9e73 commit fd6a02d

3 files changed

Lines changed: 47 additions & 30 deletions

File tree

dependencies/typedb/repositories.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ def typedb_behaviour():
1515
git_repository(
1616
name = "typedb_behaviour",
1717
remote = "https://github.com/typedb/typedb-behaviour",
18-
commit = "cf13e741844025a8a0210f25dc295ce966e26d16", # sync-marker: do not remove this comment, this is used for sync-dependencies by @typedb_behaviour
18+
commit = "88c1340f105821d06ec8bf5efcb39dd67eae0d24", # sync-marker: do not remove this comment, this is used for sync-dependencies by @typedb_behaviour
1919
)

rust/common/identifier.rs

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
use std::{fmt, sync::OnceLock};
88

9-
use regex::{Regex, RegexBuilder};
9+
use regex::Regex;
1010

1111
use crate::{
1212
common::{error::TypeQLError, Span, Spanned},
@@ -64,42 +64,59 @@ impl From<String> for Identifier {
6464
}
6565
}
6666

67-
const IDENTIFIER_CHAR: &str = "A-Za-z\
68-
\\u00C0-\\u00D6\
69-
\\u00D8-\\u00F6\
70-
\\u00F8-\\u02FF\
71-
\\u0370-\\u037D\
72-
\\u037F-\\u1FFF\
73-
\\u200C-\\u200D\
74-
\\u2070-\\u218F\
75-
\\u2C00-\\u2FEF\
76-
\\u3001-\\uD7FF\
77-
\\uF900-\\uFDCF\
78-
\\uFDF0-\\uFFFD";
79-
const IDENTIFIER_CONNECTOR: &str = "_\
80-
\\-\
81-
\\u00B7\
82-
\\u0300-\\u036F\
83-
\\u203F-\\u2040";
84-
const IDENTIFIER_DIGIT: &str = "0-9";
85-
86-
pub fn is_valid_identifier(identifier: &str) -> bool {
67+
const UNDERSCORE: &str = "_";
68+
const HYPHEN: &str = r"\-";
69+
const ASCII_DIGIT: &str = "0-9";
70+
const XID_START: &str = r"\p{XID_Start}";
71+
const XID_CONTINUE: &str = r"\p{XID_Continue}";
72+
73+
pub fn is_valid_label(identifier: &str) -> bool {
8774
static REGEX: OnceLock<Regex> = OnceLock::new();
8875
let regex = REGEX.get_or_init(|| {
89-
let identifier_tail = format!("{}{}{}", IDENTIFIER_CHAR, IDENTIFIER_CONNECTOR, IDENTIFIER_DIGIT);
90-
let identifier_pattern = format!("^[{}][{}]*$", IDENTIFIER_CHAR, identifier_tail);
91-
RegexBuilder::new(&identifier_pattern).build().unwrap()
76+
let head_classes = format!("{UNDERSCORE}{XID_START}");
77+
let tail_classes = format!("{HYPHEN}{XID_CONTINUE}");
78+
Regex::new(&format!("^[{head_classes}][{tail_classes}]*$")).unwrap()
9279
});
9380
regex.is_match(identifier)
9481
}
9582

9683
pub fn is_valid_var_identifier(identifier: &str) -> bool {
9784
static REGEX: OnceLock<Regex> = OnceLock::new();
9885
let regex = REGEX.get_or_init(|| {
99-
let identifier_head = format!("{}{}", IDENTIFIER_CHAR, IDENTIFIER_DIGIT);
100-
let identifier_tail = format!("{}{}{}", IDENTIFIER_CHAR, IDENTIFIER_DIGIT, IDENTIFIER_CONNECTOR);
101-
let identifier_pattern = format!("^[{}][{}]*$", identifier_head, identifier_tail);
102-
RegexBuilder::new(&identifier_pattern).build().unwrap()
86+
let head_classes = format!("{XID_START}{ASCII_DIGIT}");
87+
let tail_classes = format!("{HYPHEN}{XID_CONTINUE}");
88+
Regex::new(&format!("^[{head_classes}][{tail_classes}]*$")).unwrap()
10389
});
10490
regex.is_match(identifier)
10591
}
92+
93+
#[cfg(test)]
94+
mod tests {
95+
use super::*;
96+
97+
#[test]
98+
fn test_is_valid_label() {
99+
assert!(is_valid_label("person"));
100+
assert!(is_valid_label("_private"));
101+
assert!(is_valid_label("_leading-underscore"));
102+
assert!(is_valid_label("type-with-hyphens"));
103+
assert!(is_valid_label("name123"));
104+
assert!(is_valid_label("café"));
105+
assert!(!is_valid_label("0starts-with-digit"));
106+
assert!(!is_valid_label("-starts-with-hyphen"));
107+
assert!(!is_valid_label(""));
108+
assert!(!is_valid_label("has space"));
109+
}
110+
111+
#[test]
112+
fn test_is_valid_var_identifier() {
113+
assert!(is_valid_var_identifier("person"));
114+
assert!(is_valid_var_identifier("0starts-with-digit"));
115+
assert!(is_valid_var_identifier("name123"));
116+
assert!(is_valid_var_identifier("café"));
117+
assert!(!is_valid_var_identifier("_leading-underscore"));
118+
assert!(!is_valid_var_identifier("-starts-with-hyphen"));
119+
assert!(!is_valid_var_identifier(""));
120+
assert!(!is_valid_var_identifier("has space"));
121+
}
122+
}

rust/parser/typeql.pest

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ ARROW = _{ "->" }
594594
IDENTIFIER_START = @{ XID_START }
595595
IDENTIFIER_CONTINUE = @{ "-" | XID_CONTINUE }
596596

597-
IDENTIFIER_LABEL_H = @{ IDENTIFIER_START }
597+
IDENTIFIER_LABEL_H = @{ "_" | IDENTIFIER_START }
598598
IDENTIFIER_LABEL_T = @{ IDENTIFIER_CONTINUE }
599599

600600
IDENTIFIER_VAR_H = @{ IDENTIFIER_START | ASCII_DIGIT }

0 commit comments

Comments
 (0)