Skip to content

Commit 9316210

Browse files
authored
Merge branch 'main' into ayman/syncUpstream
2 parents 0dbb9c9 + 0743d2d commit 9316210

File tree

14 files changed

+434
-34
lines changed

14 files changed

+434
-34
lines changed

.github/workflows/semgrep.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Semgrep
2+
on:
3+
pull_request:
4+
workflow_dispatch:
5+
6+
jobs:
7+
semgrep:
8+
name: Run Semgrep
9+
runs-on: ubuntu-latest
10+
timeout-minutes: 30
11+
container:
12+
# A Docker image with Semgrep installed. Do not change this.
13+
image: returntocorp/semgrep
14+
if: (github.actor != 'dependabot[bot]')
15+
steps:
16+
- uses: actions/checkout@v4
17+
- run: semgrep ci
18+
env:
19+
SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN_PUBLIC }}

examples/cli.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ $ cargo run --example cli - [--dialectname]
4848

4949
let dialect: Box<dyn Dialect> = match std::env::args().nth(2).unwrap_or_default().as_ref() {
5050
"--ansi" => Box::new(AnsiDialect {}),
51+
"--databricks" => Box::new(DatabricksDialect {}),
5152
"--bigquery" => Box::new(BigQueryDialect {}),
5253
"--postgres" => Box::new(PostgreSqlDialect {}),
5354
"--ms" => Box::new(MsSqlDialect {}),

src/ast/mod.rs

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,16 @@ pub enum JsonPathElem {
600600
/// Accesses an object field or array element using bracket notation,
601601
/// e.g. `obj['foo']`.
602602
///
603+
/// Note that on Databricks this is *not* equivalent to dot notation; the
604+
/// former is case-insensitive but the latter is not.
605+
///
603606
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
604607
Bracket { key: Expr },
608+
/// Accesses all elements in the given (generally array) element. Used for
609+
/// constructs like `foo:bar[*].baz`.
610+
///
611+
/// See <https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-json-path-expression#extract-values-from-arrays>
612+
AllElements,
605613
}
606614

607615
/// A JSON path.
@@ -612,17 +620,22 @@ pub enum JsonPathElem {
612620
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
613621
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
614622
pub struct JsonPath {
623+
/// True if the path should start with a colon. Some dialects (e.g. Snowflake) allow
624+
/// `a['b']`, whereas others (e.g. Databricks) require the colon even in this case
625+
/// (so `a:['b']`).
626+
pub has_colon: bool,
615627
pub path: Vec<JsonPathElem>,
616628
}
617629

618630
impl fmt::Display for JsonPath {
619631
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
632+
if self.has_colon {
633+
write!(f, ":")?;
634+
}
620635
for (i, elem) in self.path.iter().enumerate() {
621636
match elem {
622637
JsonPathElem::Dot { key, quoted } => {
623-
if i == 0 {
624-
write!(f, ":")?;
625-
} else {
638+
if i != 0 {
626639
write!(f, ".")?;
627640
}
628641

@@ -635,6 +648,9 @@ impl fmt::Display for JsonPath {
635648
JsonPathElem::Bracket { key } => {
636649
write!(f, "[{key}]")?;
637650
}
651+
JsonPathElem::AllElements => {
652+
write!(f, "[*]")?;
653+
}
638654
}
639655
}
640656
Ok(())
@@ -841,6 +857,13 @@ pub enum Expr {
841857
subquery: Box<Query>,
842858
negated: bool,
843859
},
860+
/// XXX not valid SQL syntax, this is a hack needed to support parameter substitution
861+
/// `[ NOT ] IN <in_expr>`
862+
InExpr {
863+
expr: Box<Expr>,
864+
in_expr: Box<Expr>,
865+
negated: bool,
866+
},
844867
/// `[ NOT ] IN UNNEST(array_expression)`
845868
InUnnest {
846869
expr: Box<Expr>,
@@ -1508,6 +1531,17 @@ impl fmt::Display for Expr {
15081531
if *negated { "NOT " } else { "" },
15091532
subquery
15101533
),
1534+
Expr::InExpr {
1535+
expr,
1536+
in_expr,
1537+
negated,
1538+
} => write!(
1539+
f,
1540+
"{} {}IN {}",
1541+
expr,
1542+
if *negated { "NOT " } else { "" },
1543+
in_expr,
1544+
),
15111545
Expr::InUnnest {
15121546
expr,
15131547
array_expr,

src/ast/query.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,12 @@ pub enum TableFactor {
12431243
subquery: Box<Query>,
12441244
alias: Option<TableAlias>,
12451245
},
1246+
/// A pass-through query string that is not parsed.
1247+
/// This is useful while building/rewriting queries with a known valid SQL string and to avoid parsing it.
1248+
PassThroughQuery {
1249+
query: String,
1250+
alias: Option<TableAlias>,
1251+
},
12461252
/// `TABLE(<expr>)[ AS <alias> ]`
12471253
TableFunction {
12481254
expr: Expr,
@@ -1936,6 +1942,13 @@ impl fmt::Display for TableFactor {
19361942
}
19371943
Ok(())
19381944
}
1945+
TableFactor::PassThroughQuery { query, alias } => {
1946+
write!(f, "({query})")?;
1947+
if let Some(alias) = alias {
1948+
write!(f, " AS {alias}")?;
1949+
}
1950+
Ok(())
1951+
}
19391952
TableFactor::Function {
19401953
lateral,
19411954
name,

src/ast/spans.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,11 @@ impl Spanned for Expr {
14151415
array_expr,
14161416
negated: _,
14171417
} => expr.span().union(&array_expr.span()),
1418+
Expr::InExpr {
1419+
expr,
1420+
in_expr,
1421+
negated: _,
1422+
} => expr.span().union(&in_expr.span()),
14181423
Expr::Between {
14191424
expr,
14201425
negated: _,
@@ -1722,7 +1727,7 @@ impl Spanned for FunctionArgumentClause {
17221727
/// see Spanned impl for JsonPathElem for more information
17231728
impl Spanned for JsonPath {
17241729
fn span(&self) -> Span {
1725-
let JsonPath { path } = self;
1730+
let JsonPath { path, has_colon: _ } = self;
17261731

17271732
union_spans(path.iter().map(|i| i.span()))
17281733
}
@@ -1732,11 +1737,13 @@ impl Spanned for JsonPath {
17321737
///
17331738
/// Missing spans:
17341739
/// - [JsonPathElem::Dot]
1740+
/// - [JsonPathElem::AllElements]
17351741
impl Spanned for JsonPathElem {
17361742
fn span(&self) -> Span {
17371743
match self {
17381744
JsonPathElem::Dot { .. } => Span::empty(),
17391745
JsonPathElem::Bracket { key } => key.span(),
1746+
JsonPathElem::AllElements => Span::empty(),
17401747
}
17411748
}
17421749
}
@@ -1883,6 +1890,8 @@ impl Spanned for TableFactor {
18831890
} => subquery
18841891
.span()
18851892
.union_opt(&alias.as_ref().map(|alias| alias.span())),
1893+
// This is usually created at runtime, so we don't have a span for it
1894+
TableFactor::PassThroughQuery { query: _, alias: _ } => Span::empty(),
18861895
TableFactor::TableFunction { expr, alias } => expr
18871896
.span()
18881897
.union_opt(&alias.as_ref().map(|alias| alias.span())),

src/dialect/databricks.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::dialect::Dialect;
18+
use crate::dialect::{Dialect, Precedence};
19+
use crate::parser::{Parser, ParserError};
20+
use crate::tokenizer::Token;
1921

2022
/// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/)
2123
///
@@ -38,6 +40,19 @@ impl Dialect for DatabricksDialect {
3840
matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
3941
}
4042

43+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
44+
let token = parser.peek_token();
45+
// : is used for JSON path access
46+
match token.token {
47+
Token::Colon => Some(Ok(self.prec_value(Precedence::Period))),
48+
_ => None,
49+
}
50+
}
51+
52+
fn supports_semi_structured_array_all_elements(&self) -> bool {
53+
true
54+
}
55+
4156
fn supports_filter_during_aggregation(&self) -> bool {
4257
true
4358
}
@@ -70,8 +85,14 @@ impl Dialect for DatabricksDialect {
7085
true
7186
}
7287

88+
// https://docs.databricks.com/aws/en/sql/language-manual/data-types/string-type#literals
89+
fn supports_string_literal_backslash_escape(&self) -> bool {
90+
true
91+
}
92+
7393
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html>
7494
fn supports_group_by_with_modifier(&self) -> bool {
75-
true
95+
true
7696
}
97+
7798
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,11 @@ pub trait Dialect: Debug + Any {
970970
false
971971
}
972972

973+
/// Returns true if the dialect supports writing `[*]` to select all elements in a JSON array.
974+
fn supports_semi_structured_array_all_elements(&self) -> bool {
975+
false
976+
}
977+
973978
/// Returns true if the specified keyword is reserved and cannot be
974979
/// used as an identifier without special handling like quoting.
975980
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@
153153
// Splitting complex nodes (expressions, statements, types) into separate types
154154
// would bloat the API and hide intent. Extra memory is a worthwhile tradeoff.
155155
#![allow(clippy::large_enum_variant)]
156+
// TODO: Fix and remove this.
157+
#![expect(clippy::unnecessary_unwrap)]
156158

157159
// Allow proc-macros to find this crate
158160
extern crate self as sqlparser;

src/parser/mod.rs

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3742,7 +3742,8 @@ impl<'a> Parser<'a> {
37423742
expr: Box::new(expr),
37433743
})
37443744
} else if Token::LBracket == *tok && self.dialect.supports_partiql()
3745-
|| (dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok)
3745+
|| (dialect_of!(self is SnowflakeDialect | GenericDialect | DatabricksDialect)
3746+
&& Token::Colon == *tok)
37463747
{
37473748
self.prev_token();
37483749
self.parse_json_access(expr)
@@ -3889,21 +3890,26 @@ impl<'a> Parser<'a> {
38893890
})
38903891
}
38913892

3893+
// Parser is either looking at a : or a bracket expression.
38923894
fn parse_json_path(&mut self) -> Result<JsonPath, ParserError> {
38933895
let mut path = Vec::new();
3896+
let mut has_colon = false;
38943897
loop {
38953898
match self.next_token().token {
38963899
Token::Colon if path.is_empty() => {
3897-
path.push(self.parse_json_path_object_key()?);
3900+
has_colon = true;
3901+
if *self.peek_token_ref() == Token::LBracket {
3902+
path.push(self.parse_json_path_bracket_element()?);
3903+
} else {
3904+
path.push(self.parse_json_path_object_key()?);
3905+
}
38983906
}
38993907
Token::Period if !path.is_empty() => {
39003908
path.push(self.parse_json_path_object_key()?);
39013909
}
39023910
Token::LBracket => {
3903-
let key = self.parse_expr()?;
3904-
self.expect_token(&Token::RBracket)?;
3905-
3906-
path.push(JsonPathElem::Bracket { key });
3911+
self.prev_token();
3912+
path.push(self.parse_json_path_bracket_element()?);
39073913
}
39083914
_ => {
39093915
self.prev_token();
@@ -3913,7 +3919,23 @@ impl<'a> Parser<'a> {
39133919
}
39143920

39153921
debug_assert!(!path.is_empty());
3916-
Ok(JsonPath { path })
3922+
Ok(JsonPath { has_colon, path })
3923+
}
3924+
3925+
/// Parses a single bracketed element in a JSON path expression, including both brackets.
3926+
fn parse_json_path_bracket_element(&mut self) -> Result<JsonPathElem, ParserError> {
3927+
self.expect_token(&Token::LBracket)?;
3928+
let elem = if *self.peek_token_ref() == Token::Mul
3929+
&& self.dialect.supports_semi_structured_array_all_elements()
3930+
{
3931+
self.expect_token(&Token::Mul)?;
3932+
JsonPathElem::AllElements
3933+
} else {
3934+
let key = self.parse_expr()?;
3935+
JsonPathElem::Bracket { key }
3936+
};
3937+
self.expect_token(&Token::RBracket)?;
3938+
Ok(elem)
39173939
}
39183940

39193941
/// Parses the parens following the `[ NOT ] IN` operator.
@@ -3930,25 +3952,34 @@ impl<'a> Parser<'a> {
39303952
negated,
39313953
});
39323954
}
3933-
self.expect_token(&Token::LParen)?;
3934-
let in_op = match self.maybe_parse(|p| p.parse_query())? {
3935-
Some(subquery) => Expr::InSubquery {
3936-
expr: Box::new(expr),
3937-
subquery,
3938-
negated,
3939-
},
3940-
None => Expr::InList {
3941-
expr: Box::new(expr),
3942-
list: if self.dialect.supports_in_empty_list() {
3943-
self.parse_comma_separated0(Parser::parse_expr, Token::RParen)?
3944-
} else {
3945-
self.parse_comma_separated(Parser::parse_expr)?
3955+
if self.consume_token(&Token::LParen) {
3956+
let in_op = match self.maybe_parse(|p| p.parse_query())? {
3957+
Some(subquery) => Expr::InSubquery {
3958+
expr: Box::new(expr),
3959+
subquery,
3960+
negated,
3961+
},
3962+
None => Expr::InList {
3963+
expr: Box::new(expr),
3964+
list: if self.dialect.supports_in_empty_list() {
3965+
self.parse_comma_separated0(Parser::parse_expr, Token::RParen)?
3966+
} else {
3967+
self.parse_comma_separated(Parser::parse_expr)?
3968+
},
3969+
negated,
39463970
},
3971+
};
3972+
self.expect_token(&Token::RParen)?;
3973+
Ok(in_op)
3974+
} else {
3975+
// parse an expr
3976+
let in_expr = self.parse_expr()?;
3977+
Ok(Expr::InExpr {
3978+
expr: Box::new(expr),
3979+
in_expr: Box::new(in_expr),
39473980
negated,
3948-
},
3949-
};
3950-
self.expect_token(&Token::RParen)?;
3951-
Ok(in_op)
3981+
})
3982+
}
39523983
}
39533984

39543985
/// Parses `BETWEEN <low> AND <high>`, assuming the `BETWEEN` keyword was already consumed.
@@ -14235,7 +14266,8 @@ impl<'a> Parser<'a> {
1423514266
| TableFactor::Unpivot { alias, .. }
1423614267
| TableFactor::MatchRecognize { alias, .. }
1423714268
| TableFactor::SemanticView { alias, .. }
14238-
| TableFactor::NestedJoin { alias, .. } => {
14269+
| TableFactor::NestedJoin { alias, .. }
14270+
| TableFactor::PassThroughQuery { alias, .. } => {
1423914271
// but not `FROM (mytable AS alias1) AS alias2`.
1424014272
if let Some(inner_alias) = alias {
1424114273
return Err(ParserError::ParserError(format!(

0 commit comments

Comments
 (0)