Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 51 additions & 51 deletions datafusion/core/tests/fuzz_cases/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use std::sync::{Arc, OnceLock};
use std::sync::{Arc, LazyLock};

use arrow::array::{Array, RecordBatch, StringArray};
use arrow::datatypes::{DataType, Field, Schema};
Expand Down Expand Up @@ -204,65 +204,24 @@ impl Utf8Test {

/// all combinations of interesting charactes with lengths ranging from 1 to 4
fn values() -> &'static [String] {
VALUES.get_or_init(|| {
let mut rng = rand::thread_rng();

let characters = [
"z",
"0",
"~",
"ß",
"℣",
"%", // this one is useful for like/not like tests since it will result in randomly inserted wildcards
"_", // this one is useful for like/not like tests since it will result in randomly inserted wildcards
"\u{7F}",
"\u{7FF}",
"\u{FF}",
"\u{10FFFF}",
"\u{D7FF}",
"\u{FDCF}",
// null character
"\u{0}",
];
let value_lengths = [1, 2, 3];
let mut values = vec![];
for length in &value_lengths {
values.extend(
characters
.iter()
.cloned()
.combinations(*length)
// now get all permutations of each combination
.flat_map(|c| c.into_iter().permutations(*length))
// and join them into strings
.map(|c| c.join("")),
);
}
println!("Generated {} values", values.len());
// randomly pick 100 values
values.shuffle(&mut rng);
values.truncate(100);
values
})
&VALUES
}

/// return the in memory object store
fn memory_store() -> &'static Arc<dyn ObjectStore> {
MEMORY_STORE.get_or_init(|| Arc::new(InMemory::new()))
&MEMORY_STORE
}

/// return the schema of the created test files
fn schema() -> Arc<Schema> {
let schema = SCHEMA.get_or_init(|| {
Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]))
});
let schema = &SCHEMA;
Arc::clone(schema)
}

/// Return a list of test files with UTF8 data and combinations of
/// [`Self::values`]
async fn test_files() -> Vec<TestFile> {
let files_mutex = TESTFILES.get_or_init(|| Mutex::new(vec![]));
let files_mutex = &TESTFILES;
let mut files = files_mutex.lock().await;
if !files.is_empty() {
return (*files).clone();
Expand Down Expand Up @@ -385,16 +344,57 @@ async fn write_parquet_file(
}

/// The string values for [Utf8Test::values]
static VALUES: OnceLock<Vec<String>> = OnceLock::new();
static VALUES: LazyLock<Vec<String>> = LazyLock::new(|| {
let mut rng = rand::thread_rng();

let characters = [
"z",
"0",
"~",
"ß",
"℣",
"%", // this one is useful for like/not like tests since it will result in randomly inserted wildcards
"_", // this one is useful for like/not like tests since it will result in randomly inserted wildcards
"\u{7F}",
"\u{7FF}",
"\u{FF}",
"\u{10FFFF}",
"\u{D7FF}",
"\u{FDCF}",
// null character
"\u{0}",
];
let value_lengths = [1, 2, 3];
let mut values = vec![];
for length in &value_lengths {
values.extend(
characters
.iter()
.cloned()
.combinations(*length)
// now get all permutations of each combination
.flat_map(|c| c.into_iter().permutations(*length))
// and join them into strings
.map(|c| c.join("")),
);
}
println!("Generated {} values", values.len());
// randomly pick 100 values
values.shuffle(&mut rng);
values.truncate(100);
values
});
/// The schema for the [Utf8Test::schema]
static SCHEMA: OnceLock<Arc<Schema>> = OnceLock::new();
static SCHEMA: LazyLock<Arc<Schema>> =
LazyLock::new(|| Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)])));

/// The InMemory object store
static MEMORY_STORE: OnceLock<Arc<dyn ObjectStore>> = OnceLock::new();
static MEMORY_STORE: LazyLock<Arc<dyn ObjectStore>> =
LazyLock::new(|| Arc::new(InMemory::new()));

/// List of in memory parquet files with UTF8 data
// Use a mutex rather than OnceLock to allow for async initialization
static TESTFILES: OnceLock<Mutex<Vec<TestFile>>> = OnceLock::new();
// Use a mutex rather than LazyLock to allow for async initialization
static TESTFILES: LazyLock<Mutex<Vec<TestFile>>> = LazyLock::new(|| Mutex::new(vec![]));

/// Holds a temporary parquet file path and its size
#[derive(Debug, Clone)]
Expand Down
12 changes: 6 additions & 6 deletions datafusion/expr/src/udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ where
/// # Basic Example
/// ```
/// # use std::any::Any;
/// # use std::sync::OnceLock;
/// # use std::sync::LazyLock;
/// # use arrow::datatypes::DataType;
/// # use datafusion_common::{DataFusionError, plan_err, Result};
/// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility, Expr, Documentation};
Expand All @@ -360,14 +360,14 @@ where
/// }
/// }
///
/// static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
///
/// fn get_doc() -> &'static Documentation {
/// DOCUMENTATION.get_or_init(|| {
/// static DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
/// Documentation::builder(DOC_SECTION_AGGREGATE, "calculates a geometric mean", "geo_mean(2.0)")
/// .with_argument("arg1", "The Float64 number for the geometric mean")
/// .build()
/// })
/// });
///
/// fn get_doc() -> &'static Documentation {
/// &DOCUMENTATION
/// }
///
/// /// Implement the AggregateUDFImpl trait for GeoMeanUdf
Expand Down
12 changes: 6 additions & 6 deletions datafusion/expr/src/udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ impl ReturnInfo {
/// # Basic Example
/// ```
/// # use std::any::Any;
/// # use std::sync::OnceLock;
/// # use std::sync::LazyLock;
/// # use arrow::datatypes::DataType;
/// # use datafusion_common::{DataFusionError, plan_err, Result};
/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility};
Expand All @@ -453,14 +453,14 @@ impl ReturnInfo {
/// }
/// }
///
/// static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
///
/// fn get_doc() -> &'static Documentation {
/// DOCUMENTATION.get_or_init(|| {
/// static DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
/// Documentation::builder(DOC_SECTION_MATH, "Add one to an int32", "add_one(2)")
/// .with_argument("arg1", "The int32 number to add one to")
/// .build()
/// })
/// });
///
/// fn get_doc() -> &'static Documentation {
/// &DOCUMENTATION
/// }
///
/// /// Implement the ScalarUDFImpl trait for AddOne
Expand Down
14 changes: 7 additions & 7 deletions datafusion/expr/src/udwf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ where
/// # Basic Example
/// ```
/// # use std::any::Any;
/// # use std::sync::OnceLock;
/// # use std::sync::LazyLock;
/// # use arrow::datatypes::{DataType, Field};
/// # use datafusion_common::{DataFusionError, plan_err, Result};
/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt, Documentation};
Expand All @@ -257,14 +257,14 @@ where
/// }
/// }
///
/// static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
/// static DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
/// Documentation::builder(DOC_SECTION_ANALYTICAL, "smooths the windows", "smooth_it(2)")
/// .with_argument("arg1", "The int32 number to smooth by")
/// .build()
/// });
///
/// fn get_doc() -> &'static Documentation {
/// DOCUMENTATION.get_or_init(|| {
/// Documentation::builder(DOC_SECTION_ANALYTICAL, "smooths the windows", "smooth_it(2)")
/// .with_argument("arg1", "The int32 number to smooth by")
/// .build()
/// })
/// &DOCUMENTATION
/// }
///
/// /// Implement the WindowUDFImpl trait for SmoothIt
Expand Down
62 changes: 31 additions & 31 deletions datafusion/functions-aggregate/src/bit_and_or_xor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use datafusion_expr::{
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
use std::sync::OnceLock;
use std::sync::LazyLock;

/// This macro helps create group accumulators based on bitwise operations typically used internally
/// and might not be necessary for users to call directly.
Expand Down Expand Up @@ -134,46 +134,46 @@ macro_rules! make_bitwise_udaf_expr_and_func {
};
}

static BIT_AND_DOC: OnceLock<Documentation> = OnceLock::new();
static BIT_AND_DOC: LazyLock<Documentation> = LazyLock::new(|| {
Documentation::builder(
DOC_SECTION_GENERAL,
"Computes the bitwise AND of all non-null input values.",
"bit_and(expression)",
)
.with_standard_argument("expression", Some("Integer"))
.build()
});

fn get_bit_and_doc() -> &'static Documentation {
BIT_AND_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_GENERAL,
"Computes the bitwise AND of all non-null input values.",
"bit_and(expression)",
)
.with_standard_argument("expression", Some("Integer"))
.build()
})
&BIT_AND_DOC
}

static BIT_OR_DOC: OnceLock<Documentation> = OnceLock::new();
static BIT_OR_DOC: LazyLock<Documentation> = LazyLock::new(|| {
Documentation::builder(
DOC_SECTION_GENERAL,
"Computes the bitwise OR of all non-null input values.",
"bit_or(expression)",
)
.with_standard_argument("expression", Some("Integer"))
.build()
});

fn get_bit_or_doc() -> &'static Documentation {
BIT_OR_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_GENERAL,
"Computes the bitwise OR of all non-null input values.",
"bit_or(expression)",
)
.with_standard_argument("expression", Some("Integer"))
.build()
})
&BIT_OR_DOC
}

static BIT_XOR_DOC: OnceLock<Documentation> = OnceLock::new();
static BIT_XOR_DOC: LazyLock<Documentation> = LazyLock::new(|| {
Documentation::builder(
DOC_SECTION_GENERAL,
"Computes the bitwise exclusive OR of all non-null input values.",
"bit_xor(expression)",
)
.with_standard_argument("expression", Some("Integer"))
.build()
});

fn get_bit_xor_doc() -> &'static Documentation {
BIT_XOR_DOC.get_or_init(|| {
Documentation::builder(
DOC_SECTION_GENERAL,
"Computes the bitwise exclusive OR of all non-null input values.",
"bit_xor(expression)",
)
.with_standard_argument("expression", Some("Integer"))
.build()
})
&BIT_XOR_DOC
}

make_bitwise_udaf_expr_and_func!(
Expand Down
Loading