From f366549fa4486777d76e5de90745b0d8773c9884 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Wed, 14 Jan 2026 14:06:01 +0800 Subject: [PATCH] Support SAMPLE clause on subqueries (derived tables) Add support for SAMPLE clause on subqueries/derived tables. Previously, SAMPLE clause was only supported on table references, but Snowflake and other databases also support it on subqueries. Example: SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SELECT * FROM (SELECT * FROM mytable) AS t SAMPLE (50 PERCENT) SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SEED (42) --- src/ast/query.rs | 6 ++++++ src/ast/spans.rs | 1 + src/parser/mod.rs | 10 ++++++++++ tests/sqlparser_common.rs | 6 +++++- tests/sqlparser_snowflake.rs | 17 +++++++++++++++++ 5 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 16fc9ec0e..6e8907f20 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1242,6 +1242,8 @@ pub enum TableFactor { lateral: bool, subquery: Box, alias: Option, + /// Optional table sample modifier + sample: Option, }, /// `TABLE()[ AS ]` TableFunction { @@ -1922,6 +1924,7 @@ impl fmt::Display for TableFactor { lateral, subquery, alias, + sample, } => { if *lateral { write!(f, "LATERAL ")?; @@ -1934,6 +1937,9 @@ impl fmt::Display for TableFactor { if let Some(alias) = alias { write!(f, " {alias}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, " {sample}")?; + } Ok(()) } TableFactor::Function { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d4e843157..865e3ed24 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1881,6 +1881,7 @@ impl Spanned for TableFactor { lateral: _, subquery, alias, + sample: _, } => subquery .span() .union_opt(&alias.as_ref().map(|alias| alias.span())), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f07e8919a..dc39eb726 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14667,6 +14667,7 @@ impl<'a> Parser<'a> { pipe_operators: vec![], }), alias, + sample: None, }) } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keyword(Keyword::UNNEST) @@ -15467,6 +15468,14 @@ impl<'a> Parser<'a> { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; let alias = self.maybe_parse_table_alias()?; + + // Parse optional SAMPLE clause after alias + let sample = if let Some(parsed_sample) = self.maybe_parse_table_sample()? { + Some(TableSampleKind::AfterTableAlias(parsed_sample)) + } else { + None + }; + Ok(TableFactor::Derived { lateral: match lateral { Lateral => true, @@ -15474,6 +15483,7 @@ impl<'a> Parser<'a> { }, subquery, alias, + sample, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f549e4d0..81d8e6349 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -512,7 +512,8 @@ fn parse_update_set_from() { format_clause: None, pipe_operators: vec![], }), - alias: table_alias(true, "t2") + alias: table_alias(true, "t2"), + sample: None, }, joins: vec![] }])), @@ -7792,6 +7793,7 @@ fn parse_derived_tables() { lateral: false, subquery: Box::new(verified_query("(SELECT 1) UNION (SELECT 2)")), alias: table_alias(true, "t1"), + sample: None, }, joins: vec![Join { relation: table_from_name(ObjectName::from(vec!["t2".into()])), @@ -8800,6 +8802,7 @@ fn lateral_derived() { lateral, ref subquery, alias: Some(ref alias), + sample: _, } = join.relation { assert_eq!(lateral_in, lateral); @@ -9878,6 +9881,7 @@ fn parse_merge() { pipe_operators: vec![], }), alias: table_alias(true, "stg"), + sample: None, } ); assert_eq!(source, source_no_into); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 37e9f8cb4..990b35bc2 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3515,6 +3515,23 @@ fn test_table_sample() { snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); } +#[test] +fn test_subquery_sample() { + // Test SAMPLE clause on subqueries (derived tables) + snowflake_and_generic().verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10000 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) AS t SAMPLE (50 PERCENT)"); + // Nested subquery with SAMPLE + snowflake_and_generic().verified_stmt( + "SELECT * FROM (SELECT * FROM (SELECT report_from FROM mytable) SAMPLE (10000 ROWS)) AS anon_1", + ); + // SAMPLE with SEED on subquery + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SEED (42)"); +} + #[test] fn parse_ls_and_rm() { snowflake().one_statement_parses_to("LS @~", "LIST @~");