|
6 | 6 |
|
7 | 7 | use std::{fmt, sync::OnceLock}; |
8 | 8 |
|
9 | | -use regex::{Regex, RegexBuilder}; |
| 9 | +use regex::Regex; |
10 | 10 |
|
11 | 11 | use crate::{ |
12 | 12 | common::{error::TypeQLError, Span, Spanned}, |
@@ -64,42 +64,59 @@ impl From<String> for Identifier { |
64 | 64 | } |
65 | 65 | } |
66 | 66 |
|
67 | | -const IDENTIFIER_CHAR: &str = "A-Za-z\ |
68 | | - \\u00C0-\\u00D6\ |
69 | | - \\u00D8-\\u00F6\ |
70 | | - \\u00F8-\\u02FF\ |
71 | | - \\u0370-\\u037D\ |
72 | | - \\u037F-\\u1FFF\ |
73 | | - \\u200C-\\u200D\ |
74 | | - \\u2070-\\u218F\ |
75 | | - \\u2C00-\\u2FEF\ |
76 | | - \\u3001-\\uD7FF\ |
77 | | - \\uF900-\\uFDCF\ |
78 | | - \\uFDF0-\\uFFFD"; |
79 | | -const IDENTIFIER_CONNECTOR: &str = "_\ |
80 | | - \\-\ |
81 | | - \\u00B7\ |
82 | | - \\u0300-\\u036F\ |
83 | | - \\u203F-\\u2040"; |
84 | | -const IDENTIFIER_DIGIT: &str = "0-9"; |
85 | | - |
86 | | -pub fn is_valid_identifier(identifier: &str) -> bool { |
| 67 | +const UNDERSCORE: &str = "_"; |
| 68 | +const HYPHEN: &str = r"\-"; |
| 69 | +const ASCII_DIGIT: &str = "0-9"; |
| 70 | +const XID_START: &str = r"\p{XID_Start}"; |
| 71 | +const XID_CONTINUE: &str = r"\p{XID_Continue}"; |
| 72 | + |
| 73 | +pub fn is_valid_label(identifier: &str) -> bool { |
87 | 74 | static REGEX: OnceLock<Regex> = OnceLock::new(); |
88 | 75 | let regex = REGEX.get_or_init(|| { |
89 | | - let identifier_tail = format!("{}{}{}", IDENTIFIER_CHAR, IDENTIFIER_CONNECTOR, IDENTIFIER_DIGIT); |
90 | | - let identifier_pattern = format!("^[{}][{}]*$", IDENTIFIER_CHAR, identifier_tail); |
91 | | - RegexBuilder::new(&identifier_pattern).build().unwrap() |
| 76 | + let head_classes = format!("{UNDERSCORE}{XID_START}"); |
| 77 | + let tail_classes = format!("{HYPHEN}{XID_CONTINUE}"); |
| 78 | + Regex::new(&format!("^[{head_classes}][{tail_classes}]*$")).unwrap() |
92 | 79 | }); |
93 | 80 | regex.is_match(identifier) |
94 | 81 | } |
95 | 82 |
|
96 | 83 | pub fn is_valid_var_identifier(identifier: &str) -> bool { |
97 | 84 | static REGEX: OnceLock<Regex> = OnceLock::new(); |
98 | 85 | let regex = REGEX.get_or_init(|| { |
99 | | - let identifier_head = format!("{}{}", IDENTIFIER_CHAR, IDENTIFIER_DIGIT); |
100 | | - let identifier_tail = format!("{}{}{}", IDENTIFIER_CHAR, IDENTIFIER_DIGIT, IDENTIFIER_CONNECTOR); |
101 | | - let identifier_pattern = format!("^[{}][{}]*$", identifier_head, identifier_tail); |
102 | | - RegexBuilder::new(&identifier_pattern).build().unwrap() |
| 86 | + let head_classes = format!("{XID_START}{ASCII_DIGIT}"); |
| 87 | + let tail_classes = format!("{HYPHEN}{XID_CONTINUE}"); |
| 88 | + Regex::new(&format!("^[{head_classes}][{tail_classes}]*$")).unwrap() |
103 | 89 | }); |
104 | 90 | regex.is_match(identifier) |
105 | 91 | } |
| 92 | + |
| 93 | +#[cfg(test)] |
| 94 | +mod tests { |
| 95 | + use super::*; |
| 96 | + |
| 97 | + #[test] |
| 98 | + fn test_is_valid_label() { |
| 99 | + assert!(is_valid_label("person")); |
| 100 | + assert!(is_valid_label("_private")); |
| 101 | + assert!(is_valid_label("_leading-underscore")); |
| 102 | + assert!(is_valid_label("type-with-hyphens")); |
| 103 | + assert!(is_valid_label("name123")); |
| 104 | + assert!(is_valid_label("café")); |
| 105 | + assert!(!is_valid_label("0starts-with-digit")); |
| 106 | + assert!(!is_valid_label("-starts-with-hyphen")); |
| 107 | + assert!(!is_valid_label("")); |
| 108 | + assert!(!is_valid_label("has space")); |
| 109 | + } |
| 110 | + |
| 111 | + #[test] |
| 112 | + fn test_is_valid_var_identifier() { |
| 113 | + assert!(is_valid_var_identifier("person")); |
| 114 | + assert!(is_valid_var_identifier("0starts-with-digit")); |
| 115 | + assert!(is_valid_var_identifier("name123")); |
| 116 | + assert!(is_valid_var_identifier("café")); |
| 117 | + assert!(!is_valid_var_identifier("_leading-underscore")); |
| 118 | + assert!(!is_valid_var_identifier("-starts-with-hyphen")); |
| 119 | + assert!(!is_valid_var_identifier("")); |
| 120 | + assert!(!is_valid_var_identifier("has space")); |
| 121 | + } |
| 122 | +} |
0 commit comments