Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 169 additions & 10 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,37 +295,94 @@


def isnan(expr: Expr) -> Expr:
"""Returns true if a given number is +NaN or -NaN otherwise returns false."""
"""Returns true if a given number is +NaN or -NaN otherwise returns false.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1.0, np.nan]})
>>> result = df.select(dfn.functions.isnan(dfn.col("a")).alias("isnan"))
>>> result.collect_column("isnan")[1].as_py()
True
"""
return Expr(f.isnan(expr.expr))


def nullif(expr1: Expr, expr2: Expr) -> Expr:
"""Returns NULL if expr1 equals expr2; otherwise it returns expr1.

This can be used to perform the inverse operation of the COALESCE expression.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2], "b": [1, 3]})
>>> result = df.select(
... dfn.functions.nullif(dfn.col("a"), dfn.col("b")).alias("nullif"))
>>> result.collect_column("nullif").to_pylist()
[None, 2]
"""
return Expr(f.nullif(expr1.expr, expr2.expr))


def encode(expr: Expr, encoding: Expr) -> Expr:
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
"""Encode the ``input``, using the ``encoding``. encoding can be base64 or hex.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["hello"]})
>>> result = df.select(
... dfn.functions.encode(dfn.col("a"), dfn.lit("base64")).alias("enc"))
>>> result.collect_column("enc")[0].as_py()
'aGVsbG8'
"""
return Expr(f.encode(expr.expr, encoding.expr))


def decode(expr: Expr, encoding: Expr) -> Expr:
"""Decode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
"""Decode the ``input``, using the ``encoding``. encoding can be base64 or hex.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["aGVsbG8="]})
>>> result = df.select(
... dfn.functions.decode(dfn.col("a"), dfn.lit("base64")).alias("dec"))
>>> result.collect_column("dec")[0].as_py()
b'hello'
"""
return Expr(f.decode(expr.expr, encoding.expr))


def array_to_string(expr: Expr, delimiter: Expr) -> Expr:
"""Converts each element to its text representation."""
"""Converts each element to its text representation.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
>>> result = df.select(
... dfn.functions.array_to_string(dfn.col("a"), dfn.lit(",")).alias("s"))
>>> result.collect_column("s")[0].as_py()
'1,2,3'
"""
return Expr(f.array_to_string(expr.expr, delimiter.expr.cast(pa.string())))


def array_join(expr: Expr, delimiter: Expr) -> Expr:
"""Converts each element to its text representation.

This is an alias for :py:func:`array_to_string`.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [[1, 2, 3]]})
>>> result = df.select(
... dfn.functions.array_join(dfn.col("a"), dfn.lit("-")).alias("s"))
>>> result.collect_column("s")[0].as_py()
'1-2-3'
"""
return array_to_string(expr, delimiter)

Expand All @@ -334,6 +391,15 @@ def list_to_string(expr: Expr, delimiter: Expr) -> Expr:
"""Converts each element to its text representation.

This is an alias for :py:func:`array_to_string`.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [[4, 5, 6]]})
>>> result = df.select(
... dfn.functions.list_to_string(dfn.col("a"), dfn.lit(",")).alias("s"))
>>> result.collect_column("s")[0].as_py()
'4,5,6'
"""
return array_to_string(expr, delimiter)

Expand All @@ -342,12 +408,31 @@ def list_join(expr: Expr, delimiter: Expr) -> Expr:
"""Converts each element to its text representation.

This is an alias for :py:func:`array_to_string`.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [[7, 8, 9]]})
>>> result = df.select(
... dfn.functions.list_join(dfn.col("a"), dfn.lit("|")).alias("s"))
>>> result.collect_column("s")[0].as_py()
'7|8|9'
"""
return array_to_string(expr, delimiter)


def in_list(arg: Expr, values: list[Expr], negated: bool = False) -> Expr:
"""Returns whether the argument is contained within the list ``values``."""
"""Returns whether the argument is contained within the list ``values``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> result = df.select(
... dfn.functions.in_list(dfn.col("a"), [dfn.lit(1), dfn.lit(3)]).alias("in"))
>>> result.collect_column("in").to_pylist()
[True, False, True]
"""
values = [v.expr for v in values]
return Expr(f.in_list(arg.expr, values, negated))

Expand All @@ -357,6 +442,15 @@ def digest(value: Expr, method: Expr) -> Expr:

Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s,
blake2b, and blake3.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["hello"]})
>>> result = df.select(
... dfn.functions.digest(dfn.col("a"), dfn.lit("md5")).alias("d"))
>>> len(result.collect_column("d")[0].as_py()) > 0
True
"""
return Expr(f.digest(value.expr, method.expr))

Expand All @@ -365,6 +459,14 @@ def concat(*args: Expr) -> Expr:
"""Concatenates the text representations of all the arguments.

NULL arguments are ignored.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["hello"], "b": [" world"]})
>>> result = df.select(dfn.functions.concat(dfn.col("a"), dfn.col("b")).alias("c"))
>>> result.collect_column("c")[0].as_py()
'hello world'
"""
args = [arg.expr for arg in args]
return Expr(f.concat(args))
Expand All @@ -374,32 +476,61 @@ def concat_ws(separator: str, *args: Expr) -> Expr:
"""Concatenates the list ``args`` with the separator.

``NULL`` arguments are ignored. ``separator`` should not be ``NULL``.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": ["hello"], "b": ["world"]})
>>> result = df.select(
... dfn.functions.concat_ws("-", dfn.col("a"), dfn.col("b")).alias("c"))
>>> result.collect_column("c")[0].as_py()
'hello-world'
"""
args = [arg.expr for arg in args]
return Expr(f.concat_ws(separator, args))


def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
"""Creates a new sort expression."""
"""Creates a new sort expression.

Examples:
---------
>>> sort_expr = dfn.functions.order_by(dfn.col("a"), ascending=False)
>>> sort_expr.ascending()
False
"""
return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)


def alias(expr: Expr, name: str, metadata: dict[str, str] | None = None) -> Expr:
"""Creates an alias expression with an optional metadata dictionary.

Args:
Parameters:
-----------
expr: The expression to alias
name: The alias name
metadata: Optional metadata to attach to the column

Returns:
An expression with the given alias
Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2]})
>>> df.select(dfn.functions.alias(dfn.col("a"), "b")).collect_column("b")[0].as_py()
1
"""
return Expr(f.alias(expr.expr, name, metadata))


def col(name: str) -> Expr:
"""Creates a column reference expression."""
"""Creates a column reference expression.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> df.select(dfn.functions.col("a")).collect_column("a")[0].as_py()
1
"""
return Expr(f.col(name))


Expand All @@ -413,6 +544,14 @@ def count_star(filter: Expr | None = None) -> Expr:

Args:
filter: If provided, only count rows for which the filter is True

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> result = df.aggregate([], [dfn.functions.count_star().alias("cnt")])
>>> result.collect_column("cnt")[0].as_py()
3
"""
return count(Expr.literal(1), filter=filter)

Expand All @@ -423,6 +562,16 @@ def case(expr: Expr) -> CaseBuilder:
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
detailed usage.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> result = df.select(
... dfn.functions.case(dfn.col("a")).when(dfn.lit(1),
... dfn.lit("one")).otherwise(dfn.lit("other")).alias("c"))
>>> result.collect_column("c")[0].as_py()
'one'
"""
return CaseBuilder(f.case(expr.expr))

Expand All @@ -433,6 +582,16 @@ def when(when: Expr, then: Expr) -> CaseBuilder:
Create a :py:class:`~datafusion.expr.CaseBuilder` to match cases for the
expression ``expr``. See :py:class:`~datafusion.expr.CaseBuilder` for
detailed usage.

Examples:
---------
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
>>> result = df.select(
... dfn.functions.when(dfn.col("a") > dfn.lit(2),
... dfn.lit("big")).otherwise(dfn.lit("small")).alias("c"))
>>> result.collect_column("c")[2].as_py()
'big'
"""
return CaseBuilder(f.when(when.expr, then.expr))

Expand Down
Loading