I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,258 @@
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Index,
Series,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays.categorical import CategoricalAccessor
from pandas.core.indexes.accessors import Properties
class TestCatAccessor:
@pytest.mark.parametrize(
"method",
[
lambda x: x.cat.set_categories([1, 2, 3]),
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
lambda x: x.cat.rename_categories([1, 2, 3]),
lambda x: x.cat.remove_unused_categories(),
lambda x: x.cat.remove_categories([2]),
lambda x: x.cat.add_categories([4]),
lambda x: x.cat.as_ordered(),
lambda x: x.cat.as_unordered(),
],
)
def test_getname_categorical_accessor(self, method):
# GH#17509
ser = Series([1, 2, 3], name="A").astype("category")
expected = "A"
result = method(ser).name
assert result == expected
def test_cat_accessor(self):
ser = Series(Categorical(["a", "b", np.nan, "a"]))
tm.assert_index_equal(ser.cat.categories, Index(["a", "b"]))
assert not ser.cat.ordered, False
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
res = ser.cat.set_categories(["b", "a"])
tm.assert_categorical_equal(res.values, exp)
ser[:] = "a"
ser = ser.cat.remove_unused_categories()
tm.assert_index_equal(ser.cat.categories, Index(["a"]))
def test_cat_accessor_api(self):
# GH#9322
assert Series.cat is CategoricalAccessor
ser = Series(list("aabbcde")).astype("category")
assert isinstance(ser.cat, CategoricalAccessor)
invalid = Series([1])
with pytest.raises(AttributeError, match="only use .cat accessor"):
invalid.cat
assert not hasattr(invalid, "cat")
def test_cat_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
cat = Series(list("aabbcde")).astype("category")
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
cat.cat.xlabel = "a"
def test_categorical_delegations(self):
# invalid accessor
msg = r"Can only use \.cat accessor with a 'category' dtype"
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat()
with pytest.raises(AttributeError, match=msg):
Series(["a", "b", "c"]).cat
with pytest.raises(AttributeError, match=msg):
Series(np.arange(5.0)).cat
with pytest.raises(AttributeError, match=msg):
Series([Timestamp("20130101")]).cat
# Series should delegate calls to '.categories', '.codes', '.ordered'
# and the methods '.set_categories()' 'drop_unused_categories()' to the
# categorical
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["a", "b", "c"])
tm.assert_index_equal(ser.cat.categories, exp_categories)
ser = ser.cat.rename_categories([1, 2, 3])
exp_categories = Index([1, 2, 3])
tm.assert_index_equal(ser.cat.categories, exp_categories)
exp_codes = Series([0, 1, 2, 0], dtype="int8")
tm.assert_series_equal(ser.cat.codes, exp_codes)
assert ser.cat.ordered
ser = ser.cat.as_unordered()
assert not ser.cat.ordered
ser = ser.cat.as_ordered()
assert ser.cat.ordered
# reorder
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["c", "b", "a"])
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
ser = ser.cat.set_categories(["c", "b", "a"])
tm.assert_index_equal(ser.cat.categories, exp_categories)
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
# remove unused categories
ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
exp_categories = Index(["a", "b"])
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
ser = ser.cat.remove_unused_categories()
tm.assert_index_equal(ser.cat.categories, exp_categories)
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
# This method is likely to be confused, so test that it raises an error
# on wrong inputs:
msg = "'Series' object has no attribute 'set_categories'"
with pytest.raises(AttributeError, match=msg):
ser.set_categories([4, 3, 2, 1])
# right: ser.cat.set_categories([4,3,2,1])
# GH#18862 (let Series.cat.rename_categories take callables)
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
result = ser.cat.rename_categories(lambda x: x.upper())
expected = Series(
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"idx",
[
date_range("1/1/2015", periods=5),
date_range("1/1/2015", periods=5, tz="MET"),
period_range("1/1/2015", freq="D", periods=5),
timedelta_range("1 days", "10 days"),
],
)
def test_dt_accessor_api_for_categorical(self, idx):
# https://github.com/pandas-dev/pandas/issues/10661
ser = Series(idx)
cat = ser.astype("category")
# only testing field (like .day)
# and bool (is_month_start)
attr_names = type(ser._values)._datetimelike_ops
assert isinstance(cat.dt, Properties)
special_func_defs = [
("strftime", ("%Y-%m-%d",), {}),
("round", ("D",), {}),
("floor", ("D",), {}),
("ceil", ("D",), {}),
("asfreq", ("D",), {}),
("as_unit", ("s"), {}),
]
if idx.dtype == "M8[ns]":
# exclude dt64tz since that is already localized and would raise
tup = ("tz_localize", ("UTC",), {})
special_func_defs.append(tup)
elif idx.dtype.kind == "M":
# exclude dt64 since that is not localized so would raise
tup = ("tz_convert", ("EST",), {})
special_func_defs.append(tup)
_special_func_names = [f[0] for f in special_func_defs]
_ignore_names = ["components", "tz_localize", "tz_convert"]
func_names = [
fname
for fname in dir(ser.dt)
if not (
fname.startswith("_")
or fname in attr_names
or fname in _special_func_names
or fname in _ignore_names
)
]
func_defs = [(fname, (), {}) for fname in func_names]
func_defs.extend(
f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
)
for func, args, kwargs in func_defs:
warn_cls = []
if func == "to_period" and getattr(idx, "tz", None) is not None:
# dropping TZ
warn_cls.append(UserWarning)
if func == "to_pydatetime":
# deprecated to return Index[object]
warn_cls.append(FutureWarning)
if warn_cls:
warn_cls = tuple(warn_cls)
else:
warn_cls = None
with tm.assert_produces_warning(warn_cls):
res = getattr(cat.dt, func)(*args, **kwargs)
exp = getattr(ser.dt, func)(*args, **kwargs)
tm.assert_equal(res, exp)
for attr in attr_names:
res = getattr(cat.dt, attr)
exp = getattr(ser.dt, attr)
tm.assert_equal(res, exp)
def test_dt_accessor_api_for_categorical_invalid(self):
invalid = Series([1, 2, 3]).astype("category")
msg = "Can only use .dt accessor with datetimelike"
with pytest.raises(AttributeError, match=msg):
invalid.dt
assert not hasattr(invalid, "str")
def test_set_categories_setitem(self):
# GH#43334
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"])
df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"])
# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
df["Survived"] = Categorical(
df["Survived"], categories=["No", "Yes"], ordered=False
)
# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
def test_categorical_of_booleans_is_boolean(self):
# https://github.com/pandas-dev/pandas/issues/46313
df = DataFrame(
{"int_cat": [1, 2, 3], "bool_cat": [True, False, False]}, dtype="category"
)
value = df["bool_cat"].cat.categories.dtype
expected = np.dtype(np.bool_)
assert value is expected

View File

@ -0,0 +1,843 @@
import calendar
from datetime import (
date,
datetime,
time,
)
import locale
import unicodedata
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.timezones import maybe_get_tz
from pandas.errors import SettingWithCopyError
from pandas.core.dtypes.common import (
is_integer_dtype,
is_list_like,
)
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Period,
PeriodIndex,
Series,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
ok_for_period = PeriodArray._datetimelike_ops
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
ok_for_dt = DatetimeArray._datetimelike_ops
ok_for_dt_methods = [
"to_period",
"to_pydatetime",
"tz_localize",
"tz_convert",
"normalize",
"strftime",
"round",
"floor",
"ceil",
"day_name",
"month_name",
"isocalendar",
"as_unit",
]
ok_for_td = TimedeltaArray._datetimelike_ops
ok_for_td_methods = [
"components",
"to_pytimedelta",
"total_seconds",
"round",
"floor",
"ceil",
"as_unit",
]
def get_dir(ser):
# check limited display api
results = [r for r in ser.dt.__dir__() if not r.startswith("_")]
return sorted(set(results))
class TestSeriesDatetimeValues:
def _compare(self, ser, name):
# GH 7207, 11128
# test .dt namespace accessor
def get_expected(ser, prop):
result = getattr(Index(ser._values), prop)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result) or isinstance(result, DataFrame):
return result
return Series(result, index=ser.index, name=ser.name)
left = getattr(ser.dt, name)
right = get_expected(ser, name)
if not (is_list_like(left) and is_list_like(right)):
assert left == right
elif isinstance(left, DataFrame):
tm.assert_frame_equal(left, right)
else:
tm.assert_series_equal(left, right)
@pytest.mark.parametrize("freq", ["D", "s", "ms"])
def test_dt_namespace_accessor_datetime64(self, freq):
# GH#7207, GH#11128
# test .dt namespace accessor
# datetimeindex
dti = date_range("20130101", periods=5, freq=freq)
ser = Series(dti, name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_dt_methods:
getattr(ser.dt, prop)
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.tz_localize("US/Eastern")
exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern")
expected = Series(exp_values, index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "US/Eastern"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
# let's localize, then convert
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
exp_values = (
DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern")
)
expected = Series(exp_values, index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_namespace_accessor_datetime64tz(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# datetimeindex with tz
dti = date_range("20130101", periods=5, tz="US/Eastern")
ser = Series(dti, name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_dt_methods:
getattr(ser.dt, prop)
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.tz_convert("CET")
expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "CET"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
def test_dt_namespace_accessor_timedelta(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# timedelta index
cases = [
Series(
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
),
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
Series(
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
name="xxx",
),
]
for ser in cases:
for prop in ok_for_td:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_td_methods:
getattr(ser.dt, prop)
result = ser.dt.components
assert isinstance(result, DataFrame)
tm.assert_index_equal(result.index, ser.index)
result = ser.dt.to_pytimedelta()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.total_seconds()
assert isinstance(result, Series)
assert result.dtype == "float64"
freq_result = ser.dt.freq
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq
def test_dt_namespace_accessor_period(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# periodindex
pi = period_range("20130101", periods=5, freq="D")
ser = Series(pi, name="xxx")
for prop in ok_for_period:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_period_methods:
getattr(ser.dt, prop)
freq_result = ser.dt.freq
assert freq_result == PeriodIndex(ser.values).freq
def test_dt_namespace_accessor_index_and_values(self):
# both
index = date_range("20130101", periods=3, freq="D")
dti = date_range("20140204", periods=3, freq="s")
ser = Series(dti, index=index, name="xxx")
exp = Series(
np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx"
)
tm.assert_series_equal(ser.dt.year, exp)
exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx")
tm.assert_series_equal(ser.dt.month, exp)
exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx")
tm.assert_series_equal(ser.dt.second, exp)
exp = Series([ser.iloc[0]] * 3, index=index, name="xxx")
tm.assert_series_equal(ser.dt.normalize(), exp)
def test_dt_accessor_limited_display_api(self):
# tznaive
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
results = get_dir(ser)
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
# tzaware
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
results = get_dir(ser)
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
# Period
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
ser = Series(idx)
results = get_dir(ser)
tm.assert_almost_equal(
results, sorted(set(ok_for_period + ok_for_period_methods))
)
def test_dt_accessor_ambiguous_freq_conversions(self):
# GH#11295
# ambiguous time error on the conversions
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
exp_values = date_range(
"2015-01-01", "2016-01-01", freq="min", tz="UTC"
).tz_convert("America/Chicago")
# freq not preserved by tz_localize above
exp_values = exp_values._with_freq(None)
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(ser, expected)
def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write):
# no setting allowed
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
with pytest.raises(ValueError, match="modifications"):
ser.dt.hour = 5
# trying to set a copy
msg = "modifications to a property of a datetimelike.+not supported"
with pd.option_context("chained_assignment", "raise"):
if using_copy_on_write:
with tm.raises_chained_assignment_error():
ser.dt.hour[0] = 5
elif warn_copy_on_write:
with tm.assert_produces_warning(
FutureWarning, match="ChainedAssignmentError"
):
ser.dt.hour[0] = 5
else:
with pytest.raises(SettingWithCopyError, match=msg):
ser.dt.hour[0] = 5
@pytest.mark.parametrize(
"method, dates",
[
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
],
)
def test_dt_round(self, method, dates):
# round
ser = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = getattr(ser.dt, method)("D")
expected = Series(pd.to_datetime(dates), name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_round_tz(self):
ser = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
exp_values = pd.to_datetime(
["2012-01-01", "2012-01-01", "2012-01-01"]
).tz_localize("US/Eastern")
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
def test_dt_round_tz_ambiguous(self, method):
# GH 18946 round near "fall back" DST
df1 = DataFrame(
[
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
],
columns=["date"],
)
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
# infer
result = getattr(df1.date.dt, method)("h", ambiguous="infer")
expected = df1["date"]
tm.assert_series_equal(result, expected)
# bool-array
result = getattr(df1.date.dt, method)("h", ambiguous=[True, False, False])
tm.assert_series_equal(result, expected)
# NaT
result = getattr(df1.date.dt, method)("h", ambiguous="NaT")
expected = df1["date"].copy()
expected.iloc[0:2] = pd.NaT
tm.assert_series_equal(result, expected)
# raise
with tm.external_error_raised(pytz.AmbiguousTimeError):
getattr(df1.date.dt, method)("h", ambiguous="raise")
@pytest.mark.parametrize(
"method, ts_str, freq",
[
["ceil", "2018-03-11 01:59:00-0600", "5min"],
["round", "2018-03-11 01:59:00-0600", "5min"],
["floor", "2018-03-11 03:01:00-0500", "2h"],
],
)
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
# GH 23324 round near "spring forward" DST
ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
result = getattr(ser.dt, method)(freq, nonexistent="shift_forward")
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
tm.assert_series_equal(result, expected)
result = getattr(ser.dt, method)(freq, nonexistent="NaT")
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
tm.assert_series_equal(result, expected)
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
getattr(ser.dt, method)(freq, nonexistent="raise")
@pytest.mark.parametrize("freq", ["ns", "us", "1000us"])
def test_dt_round_nonnano_higher_resolution_no_op(self, freq):
# GH 52761
ser = Series(
["2020-05-31 08:00:00", "2000-12-31 04:00:05", "1800-03-14 07:30:20"],
dtype="datetime64[ms]",
)
expected = ser.copy()
result = ser.dt.round(freq)
tm.assert_series_equal(result, expected)
assert not np.shares_memory(ser.array._ndarray, result.array._ndarray)
def test_dt_namespace_accessor_categorical(self):
# GH 19468
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
ser = Series(pd.Categorical(dti), name="foo")
result = ser.dt.year
expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo")
tm.assert_series_equal(result, expected)
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_localize(tz)
expected = datetimes.dt.tz_localize(tz)
tm.assert_series_equal(result, expected)
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_convert(tz)
expected = datetimes.dt.tz_convert(tz)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
def test_dt_other_accessors_categorical(self, accessor):
# GH 27952
datetimes = Series(
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = getattr(categorical.dt, accessor)
expected = getattr(datetimes.dt, accessor)
tm.assert_series_equal(result, expected)
def test_dt_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
ser = Series(date_range("20130101", periods=5, freq="D"))
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
ser.dt.xlabel = "a"
# error: Unsupported operand types for + ("List[None]" and "List[str]")
@pytest.mark.parametrize(
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
)
def test_dt_accessor_datetime_name_accessors(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert ser.dt.day_name(locale=time_locale)[day] == name
assert ser.dt.day_name(locale=None)[day] == eng_name
ser = pd.concat([ser, Series([pd.NaT])])
assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1])
ser = Series(date_range(freq="ME", start="2012", end="2013"))
result = ser.dt.month_name(locale=time_locale)
expected = Series([month.capitalize() for month in expected_months])
# work around https://github.com/pandas-dev/pandas/issues/22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_series_equal(result, expected)
for s_date, expected in zip(ser, expected_months):
result = s_date.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", expected)
assert result == expected
ser = pd.concat([ser, Series([pd.NaT])])
assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
def test_strftime(self):
# GH 10086
ser = Series(date_range("20130101", periods=5))
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/04 11-22-33",
"2015/02/05 11-22-33",
"2015/02/06 11-22-33",
"2015/02/07 11-22-33",
]
)
tm.assert_series_equal(result, expected)
ser = Series(period_range("20130101", periods=5))
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/03 11-22-34",
"2015/02/03 11-22-35",
"2015/02/03 11-22-36",
"2015/02/03 11-22-37",
]
)
tm.assert_series_equal(result, expected)
def test_strftime_dt64_days(self):
ser = Series(date_range("20130101", periods=5))
ser.iloc[0] = pd.NaT
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
[np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
datetime_index = date_range("20150301", periods=5)
result = datetime_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
def test_strftime_period_days(self, using_infer_string):
period_index = period_range("20150301", periods=5)
result = period_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype="=U10",
)
if using_infer_string:
expected = expected.astype("string[pyarrow_numpy]")
tm.assert_index_equal(result, expected)
def test_strftime_dt64_microsecond_resolution(self):
ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
tm.assert_series_equal(result, expected)
def test_strftime_period_hours(self):
ser = Series(period_range("20130101", periods=4, freq="h"))
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S")
expected = Series(
[
"2013/01/01 00:00:00",
"2013/01/01 01:00:00",
"2013/01/01 02:00:00",
"2013/01/01 03:00:00",
]
)
tm.assert_series_equal(result, expected)
def test_strftime_period_minutes(self):
ser = Series(period_range("20130101", periods=4, freq="ms"))
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
expected = Series(
[
"2013/01/01 00:00:00.000",
"2013/01/01 00:00:00.001",
"2013/01/01 00:00:00.002",
"2013/01/01 00:00:00.003",
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
DatetimeIndex(["2019-01-01", pd.NaT]),
PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
],
)
def test_strftime_nat(self, data):
# GH 29578
ser = Series(data)
result = ser.dt.strftime("%Y-%m-%d")
expected = Series(["2019-01-01", np.nan])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data", [DatetimeIndex([pd.NaT]), PeriodIndex([pd.NaT], dtype="period[D]")]
)
def test_strftime_all_nat(self, data):
# https://github.com/pandas-dev/pandas/issues/45858
ser = Series(data)
with tm.assert_produces_warning(None):
result = ser.dt.strftime("%Y-%m-%d")
expected = Series([np.nan], dtype=object)
tm.assert_series_equal(result, expected)
def test_valid_dt_with_missing_values(self):
# GH 8689
ser = Series(date_range("20130101", periods=5, freq="D"))
ser.iloc[2] = pd.NaT
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
expected = getattr(ser.dt, attr).copy()
expected.iloc[2] = np.nan
result = getattr(ser.dt, attr)
tm.assert_series_equal(result, expected)
result = ser.dt.date
expected = Series(
[
date(2013, 1, 1),
date(2013, 1, 2),
pd.NaT,
date(2013, 1, 4),
date(2013, 1, 5),
],
dtype="object",
)
tm.assert_series_equal(result, expected)
result = ser.dt.time
expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object")
tm.assert_series_equal(result, expected)
def test_dt_accessor_api(self):
# GH 9322
from pandas.core.indexes.accessors import (
CombinedDatetimelikeProperties,
DatetimeProperties,
)
assert Series.dt is CombinedDatetimelikeProperties
ser = Series(date_range("2000-01-01", periods=3))
assert isinstance(ser.dt, DatetimeProperties)
@pytest.mark.parametrize(
"ser",
[
Series(np.arange(5)),
Series(list("abcde")),
Series(np.random.default_rng(2).standard_normal(5)),
],
)
def test_dt_accessor_invalid(self, ser):
# GH#9322 check that series with incorrect dtypes don't have attr
with pytest.raises(AttributeError, match="only use .dt accessor"):
ser.dt
assert not hasattr(ser, "dt")
def test_dt_accessor_updates_on_inplace(self):
ser = Series(date_range("2018-01-01", periods=10))
ser[2] = None
return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True)
assert return_value is None
result = ser.dt.date
assert result[0] == result[2]
def test_date_tz(self):
# GH11757
rng = DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
tz="US/Eastern",
)
ser = Series(rng)
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
tm.assert_series_equal(ser.dt.date, expected)
tm.assert_series_equal(ser.apply(lambda x: x.date()), expected)
def test_dt_timetz_accessor(self, tz_naive_fixture):
# GH21358
tz = maybe_get_tz(tz_naive_fixture)
dtindex = DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
)
ser = Series(dtindex)
expected = Series(
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
)
result = ser.dt.timetz
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input_series, expected_output",
[
[["2020-01-01"], [[2020, 1, 3]]],
[[pd.NaT], [[np.nan, np.nan, np.nan]]],
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.nan, np.nan, np.nan]]],
# see GH#36032
[["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
[["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
],
)
def test_isocalendar(self, input_series, expected_output):
result = pd.to_datetime(Series(input_series)).dt.isocalendar()
expected_frame = DataFrame(
expected_output, columns=["year", "week", "day"], dtype="UInt32"
)
tm.assert_frame_equal(result, expected_frame)
def test_hour_index(self):
dt_series = Series(
date_range(start="2021-01-01", periods=5, freq="h"),
index=[2, 6, 7, 8, 11],
dtype="category",
)
result = dt_series.dt.hour
expected = Series(
[0, 1, 2, 3, 4],
dtype="int32",
index=[2, 6, 7, 8, 11],
)
tm.assert_series_equal(result, expected)
class TestSeriesPeriodValuesDtAccessor:
@pytest.mark.parametrize(
"input_vals",
[
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
[
Period("2016-01-01 00:00:00", freq="h"),
Period("2016-01-01 01:00:00", freq="h"),
],
[
Period("2016-01-01 00:00:00", freq="M"),
Period("2016-01-01 00:01:00", freq="M"),
],
[
Period("2016-01-01 00:00:00", freq="s"),
Period("2016-01-01 00:00:01", freq="s"),
],
],
)
def test_end_time_timevalues(self, input_vals):
# GH#17157
# Check that the time part of the Period is adjusted by end_time
# when using the dt accessor on a Series
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
ser = Series(input_vals)
result = ser.dt.end_time
expected = ser.apply(lambda x: x.end_time)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
def test_to_period(self, input_vals):
# GH#21205
expected = Series([input_vals], dtype="Period[D]")
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
tm.assert_series_equal(result, expected)
def test_normalize_pre_epoch_dates():
# GH: 36294
ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"]))
result = ser.dt.normalize()
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
tm.assert_series_equal(result, expected)
def test_day_attribute_non_nano_beyond_int32():
# GH 52386
data = np.array(
[
136457654736252,
134736784364431,
245345345545332,
223432411,
2343241,
3634548734,
23234,
],
dtype="timedelta64[s]",
)
ser = Series(data)
result = ser.dt.days
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,129 @@
import re
import pytest
from pandas import (
ArrowDtype,
Series,
)
import pandas._testing as tm
pa = pytest.importorskip("pyarrow")
from pandas.compat import pa_version_under11p0
@pytest.mark.parametrize(
"list_dtype",
(
pa.list_(pa.int64()),
pa.list_(pa.int64(), list_size=3),
pa.large_list(pa.int64()),
),
)
def test_list_getitem(list_dtype):
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(list_dtype),
)
actual = ser.list[1]
expected = Series([2, None, None], dtype="int64[pyarrow]")
tm.assert_series_equal(actual, expected)
def test_list_getitem_slice():
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
if pa_version_under11p0:
with pytest.raises(
NotImplementedError, match="List slice not supported by pyarrow "
):
ser.list[1:None:None]
else:
actual = ser.list[1:None:None]
expected = Series(
[[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
)
tm.assert_series_equal(actual, expected)
def test_list_len():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
actual = ser.list.len()
expected = Series([3, 2, None], dtype=ArrowDtype(pa.int32()))
tm.assert_series_equal(actual, expected)
def test_list_flatten():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
actual = ser.list.flatten()
expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
tm.assert_series_equal(actual, expected)
def test_list_getitem_slice_invalid():
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
if pa_version_under11p0:
with pytest.raises(
NotImplementedError, match="List slice not supported by pyarrow "
):
ser.list[1:None:0]
else:
with pytest.raises(pa.lib.ArrowInvalid, match=re.escape("`step` must be >= 1")):
ser.list[1:None:0]
def test_list_accessor_non_list_dtype():
ser = Series(
[1, 2, 4],
dtype=ArrowDtype(pa.int64()),
)
with pytest.raises(
AttributeError,
match=re.escape(
"Can only use the '.list' accessor with 'list[pyarrow]' dtype, "
"not int64[pyarrow]."
),
):
ser.list[1:None:0]
@pytest.mark.parametrize(
"list_dtype",
(
pa.list_(pa.int64()),
pa.list_(pa.int64(), list_size=3),
pa.large_list(pa.int64()),
),
)
def test_list_getitem_invalid_index(list_dtype):
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(list_dtype),
)
with pytest.raises(pa.lib.ArrowInvalid, match="Index -1 is out of bounds"):
ser.list[-1]
with pytest.raises(pa.lib.ArrowInvalid, match="Index 5 is out of bounds"):
ser.list[5]
with pytest.raises(ValueError, match="key must be an int or slice, got str"):
ser.list["abc"]
def test_list_accessor_not_iterable():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
with pytest.raises(TypeError, match="'ListAccessor' object is not iterable"):
iter(ser.list)

View File

@ -0,0 +1,9 @@
from pandas import Series
class TestSparseAccessor:
def test_sparse_accessor_updates_on_inplace(self):
ser = Series([1, 1, 2, 3], dtype="Sparse[int]")
return_value = ser.drop([0, 1], inplace=True)
assert return_value is None
assert ser.sparse.density == 1.0

View File

@ -0,0 +1,25 @@
import pytest
from pandas import Series
import pandas._testing as tm
class TestStrAccessor:
def test_str_attribute(self):
# GH#9068
methods = ["strip", "rstrip", "lstrip"]
ser = Series([" jack", "jill ", " jesse ", "frank"])
for method in methods:
expected = Series([getattr(str, method)(x) for x in ser.values])
tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected)
# str accessor only valid with string values
ser = Series(range(5))
with pytest.raises(AttributeError, match="only use .str accessor"):
ser.str.repeat(2)
def test_str_accessor_updates_on_inplace(self):
ser = Series(list("abc"))
return_value = ser.drop([0], inplace=True)
assert return_value is None
assert len(ser.str.lower()) == 2

View File

@ -0,0 +1,196 @@
import re
import pytest
from pandas.compat.pyarrow import (
pa_version_under11p0,
pa_version_under13p0,
)
from pandas import (
ArrowDtype,
DataFrame,
Index,
Series,
)
import pandas._testing as tm
pa = pytest.importorskip("pyarrow")
pc = pytest.importorskip("pyarrow.compute")
def test_struct_accessor_dtypes():
ser = Series(
[],
dtype=ArrowDtype(
pa.struct(
[
("int_col", pa.int64()),
("string_col", pa.string()),
(
"struct_col",
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
]
),
),
]
)
),
)
actual = ser.struct.dtypes
expected = Series(
[
ArrowDtype(pa.int64()),
ArrowDtype(pa.string()),
ArrowDtype(
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
]
)
),
],
index=Index(["int_col", "string_col", "struct_col"]),
)
tm.assert_series_equal(actual, expected)
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
def test_struct_accessor_field():
index = Index([-100, 42, 123])
ser = Series(
[
{"rice": 1.0, "maize": -1, "wheat": "a"},
{"rice": 2.0, "maize": 0, "wheat": "b"},
{"rice": 3.0, "maize": 1, "wheat": "c"},
],
dtype=ArrowDtype(
pa.struct(
[
("rice", pa.float64()),
("maize", pa.int64()),
("wheat", pa.string()),
]
)
),
index=index,
)
by_name = ser.struct.field("maize")
by_name_expected = Series(
[-1, 0, 1],
dtype=ArrowDtype(pa.int64()),
index=index,
name="maize",
)
tm.assert_series_equal(by_name, by_name_expected)
by_index = ser.struct.field(2)
by_index_expected = Series(
["a", "b", "c"],
dtype=ArrowDtype(pa.string()),
index=index,
name="wheat",
)
tm.assert_series_equal(by_index, by_index_expected)
def test_struct_accessor_field_with_invalid_name_or_index():
ser = Series([], dtype=ArrowDtype(pa.struct([("field", pa.int64())])))
with pytest.raises(ValueError, match="name_or_index must be an int, str,"):
ser.struct.field(1.1)
@pytest.mark.skipif(pa_version_under11p0, reason="pyarrow>=11.0.0 required")
def test_struct_accessor_explode():
index = Index([-100, 42, 123])
ser = Series(
[
{"painted": 1, "snapping": {"sea": "green"}},
{"painted": 2, "snapping": {"sea": "leatherback"}},
{"painted": 3, "snapping": {"sea": "hawksbill"}},
],
dtype=ArrowDtype(
pa.struct(
[
("painted", pa.int64()),
("snapping", pa.struct([("sea", pa.string())])),
]
)
),
index=index,
)
actual = ser.struct.explode()
expected = DataFrame(
{
"painted": Series([1, 2, 3], index=index, dtype=ArrowDtype(pa.int64())),
"snapping": Series(
[{"sea": "green"}, {"sea": "leatherback"}, {"sea": "hawksbill"}],
index=index,
dtype=ArrowDtype(pa.struct([("sea", pa.string())])),
),
},
)
tm.assert_frame_equal(actual, expected)
@pytest.mark.parametrize(
"invalid",
[
pytest.param(Series([1, 2, 3], dtype="int64"), id="int64"),
pytest.param(
Series(["a", "b", "c"], dtype="string[pyarrow]"), id="string-pyarrow"
),
],
)
def test_struct_accessor_api_for_invalid(invalid):
with pytest.raises(
AttributeError,
match=re.escape(
"Can only use the '.struct' accessor with 'struct[pyarrow]' dtype, "
f"not {invalid.dtype}."
),
):
invalid.struct
@pytest.mark.parametrize(
["indices", "name"],
[
(0, "int_col"),
([1, 2], "str_col"),
(pc.field("int_col"), "int_col"),
("int_col", "int_col"),
(b"string_col", b"string_col"),
([b"string_col"], "string_col"),
],
)
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
def test_struct_accessor_field_expanded(indices, name):
arrow_type = pa.struct(
[
("int_col", pa.int64()),
(
"struct_col",
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
("str_col", pa.string()),
]
),
),
(b"string_col", pa.string()),
]
)
data = pa.array([], type=arrow_type)
ser = Series(data, dtype=ArrowDtype(arrow_type))
expected = pc.struct_field(data, indices)
result = ser.struct.field(indices)
tm.assert_equal(result.array._pa_array.combine_chunks(), expected)
assert result.name == name