I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,27 @@
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
)
# Note: identical the "multi" entry in the top-level "index" fixture
@pytest.fixture
def idx():
# a MultiIndex used to test the general functionality of the
# general functionality of this object
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi

View File

@ -0,0 +1,263 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
date_range,
period_range,
)
import pandas._testing as tm
def test_infer_objects(idx):
with pytest.raises(NotImplementedError, match="to_frame"):
idx.infer_objects()
def test_shift(idx):
# GH8083 test the base class for shift
msg = (
"This method is only implemented for DatetimeIndex, PeriodIndex and "
"TimedeltaIndex; Got type MultiIndex"
)
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1)
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1, 2)
def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = idx.groupby(idx)
exp = {key: [key] for key in idx}
tm.assert_dict_equal(groups, exp)
def test_truncate_multiindex():
# GH 34564 for MultiIndex level names check
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["L1", "L2"],
)
result = index.truncate(before=1)
assert "foo" not in result.levels[0]
assert 1 in result.levels[0]
assert index.names == result.names
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
assert index.names == result.names
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
assert index.names == result.names
msg = "after < before"
with pytest.raises(ValueError, match=msg):
index.truncate(3, 1)
# TODO: reshape
def test_reorder_levels(idx):
# this blows up
with pytest.raises(IndexError, match="^Too many levels"):
idx.reorder_levels([2, 1, 0])
def test_numpy_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(m, reps, axis=1)
def test_append_mixed_dtypes():
# GH 13660
dti = date_range("2011-01-01", freq="ME", periods=3)
dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
pi = period_range("2011-01", freq="M", periods=3)
mi = MultiIndex.from_arrays(
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
)
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
["a", "b", "c", "a", "b", "c"],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi),
]
)
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays(
[
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
]
)
res = mi.append(other)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, "x", "y", "z"],
[1.1, np.nan, 3.3, "x", "y", "z"],
["a", "b", "c", "x", "y", "z"],
dti.append(Index(["x", "y", "z"])),
dti_tz.append(Index(["x", "y", "z"])),
pi.append(Index(["x", "y", "z"])),
]
)
tm.assert_index_equal(res, exp)
def test_iter(idx):
result = list(idx)
expected = [
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
]
assert result == expected
def test_sub(idx):
first = idx
# - now raises (previously was set op difference)
msg = "cannot perform __sub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first - idx[-3:]
with pytest.raises(TypeError, match=msg):
idx[-3:] - first
with pytest.raises(TypeError, match=msg):
idx[-3:] - first.tolist()
msg = "cannot perform __rsub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first.tolist() - idx[-3:]
def test_map(idx):
# callable
index = idx
result = index.map(lambda x: x)
tm.assert_index_equal(result, index)
@pytest.mark.parametrize(
"mapper",
[
lambda values, idx: {i: e for e, i in zip(values, idx)},
lambda values, idx: pd.Series(values, idx),
],
)
def test_map_dictlike(idx, mapper):
identity = mapper(idx.values, idx)
# we don't infer to uint64 dtype for a dict
if idx.dtype == np.uint64 and isinstance(identity, dict):
expected = idx.astype("int64")
else:
expected = idx
result = idx.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = Index([np.nan] * len(idx))
result = idx.map(mapper(expected, idx))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"func",
[
np.exp,
np.exp2,
np.expm1,
np.log,
np.log2,
np.log10,
np.log1p,
np.sqrt,
np.sin,
np.cos,
np.tan,
np.arcsin,
np.arccos,
np.arctan,
np.sinh,
np.cosh,
np.tanh,
np.arcsinh,
np.arccosh,
np.arctanh,
np.deg2rad,
np.rad2deg,
],
ids=lambda func: func.__name__,
)
def test_numpy_ufuncs(idx, func):
# test ufuncs of numpy. see:
# https://numpy.org/doc/stable/reference/ufuncs.html
expected_exception = TypeError
msg = (
"loop of ufunc does not support argument 0 of type tuple which "
f"has no callable {func.__name__} method"
)
with pytest.raises(expected_exception, match=msg):
func(idx)
@pytest.mark.parametrize(
"func",
[np.isfinite, np.isinf, np.isnan, np.signbit],
ids=lambda func: func.__name__,
)
def test_numpy_type_funcs(idx, func):
msg = (
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
"could not be safely coerced to any supported types according to "
"the casting rule ''safe''"
)
with pytest.raises(TypeError, match=msg):
func(idx)

View File

@ -0,0 +1,30 @@
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas._testing as tm
def test_astype(idx):
expected = idx.copy()
actual = idx.astype("O")
tm.assert_copy(actual.levels, expected.levels)
tm.assert_copy(actual.codes, expected.codes)
assert actual.names == list(expected.names)
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
@pytest.mark.parametrize("ordered", [True, False])
def test_astype_category(idx, ordered):
# GH 18630
msg = "> 1 ndim Categorical are not supported at this time"
with pytest.raises(NotImplementedError, match=msg):
idx.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with pytest.raises(NotImplementedError, match=msg):
idx.astype("category")

View File

@ -0,0 +1,122 @@
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas._testing as tm
def test_numeric_compat(idx):
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = "cannot perform __truediv__"
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(" __", " __r")
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
def test_logical_compat(idx, method):
msg = f"cannot perform {method}"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)()
def test_inplace_mutation_resets_values():
levels = [["a", "b", "c"], [4]]
levels2 = [[1, 2, 3], ["a"]]
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)
# instantiating MultiIndex should not access/cache _.values
assert "_values" not in mi1._cache
assert "_values" not in mi2._cache
vals = mi1.values.copy()
vals2 = mi2.values.copy()
# accessing .values should cache ._values
assert mi1._values is mi1._cache["_values"]
assert mi1.values is mi1._cache["_values"]
assert isinstance(mi1._cache["_values"], np.ndarray)
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Doesn't drop _values from _cache [implementation detail]
tm.assert_almost_equal(mi1._cache["_values"], vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Make sure label setting works too
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(1, "a")] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_mi = mi2.set_codes(codes2)
assert "_values" not in new_mi._cache
new_values = new_mi.values
assert "_values" in new_mi._cache
# Shouldn't change cache
tm.assert_almost_equal(mi2._cache["_values"], vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
def test_boxable_categorical_values():
cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h"))
result = MultiIndex.from_product([["a", "b", "c"], cat]).values
expected = pd.Series(
[
("a", pd.Timestamp("2012-01-01 00:00:00")),
("a", pd.Timestamp("2012-01-01 01:00:00")),
("a", pd.Timestamp("2012-01-01 02:00:00")),
("b", pd.Timestamp("2012-01-01 00:00:00")),
("b", pd.Timestamp("2012-01-01 01:00:00")),
("b", pd.Timestamp("2012-01-01 02:00:00")),
("c", pd.Timestamp("2012-01-01 00:00:00")),
("c", pd.Timestamp("2012-01-01 01:00:00")),
("c", pd.Timestamp("2012-01-01 02:00:00")),
]
).values
tm.assert_numpy_array_equal(result, expected)
result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values
expected = pd.DataFrame(
{
"a": ["a", "b", "c"],
"b": [
pd.Timestamp("2012-01-01 00:00:00"),
pd.Timestamp("2012-01-01 01:00:00"),
pd.Timestamp("2012-01-01 02:00:00"),
],
"c": [
pd.Timestamp("2012-01-01 00:00:00"),
pd.Timestamp("2012-01-01 01:00:00"),
pd.Timestamp("2012-01-01 02:00:00"),
],
}
).values
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,860 @@
from datetime import (
date,
datetime,
)
import itertools
import numpy as np
import pytest
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
def test_constructor_single_level():
result = MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]
def test_constructor_no_levels():
msg = "non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=[], codes=[])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=[])
with pytest.raises(TypeError, match=msg):
MultiIndex(codes=[])
def test_constructor_nonhashable_names():
# GH 20527
levels = [[1, 2], ["one", "two"]]
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = (["foo"], ["bar"])
msg = r"MultiIndex\.name must be a hashable type"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=levels, codes=codes, names=names)
# With .rename()
mi = MultiIndex(
levels=[[1, 2], ["one", "two"]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=("foo", "bar"),
)
renamed = [["fooo"], ["barr"]]
with pytest.raises(TypeError, match=msg):
mi.rename(names=renamed)
# With .set_names()
with pytest.raises(TypeError, match=msg):
mi.set_names(names=renamed)
def test_constructor_mismatched_codes_levels(idx):
codes = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
msg = "Length of levels and codes must be the same"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=levels, codes=codes)
length_error = (
r"On level 0, code max \(3\) >= length of level \(1\)\. "
"NOTE: this index is in an inconsistent state"
)
label_error = r"Unequal code lengths: \[4, 2\]"
code_value_error = r"On level 0, code value \(-2\) < -1"
# important to check that it's looking at the right thing.
with pytest.raises(ValueError, match=length_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
with pytest.raises(ValueError, match=label_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
# external API
with pytest.raises(ValueError, match=length_error):
idx.copy().set_levels([["a"], ["b"]])
with pytest.raises(ValueError, match=label_error):
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
# test set_codes with verify_integrity=False
# the setting should not raise any value error
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
# code value smaller than -1
with pytest.raises(ValueError, match=code_value_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
def test_na_levels():
# GH26408
# test if codes are re-assigned value -1 for levels
# with missing values (NaN, NaT, None)
result = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
)
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
)
tm.assert_index_equal(result, expected)
# verify set_levels and set_codes
result = MultiIndex(
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
).set_codes([[0, -1, 1, 2, 3, 4]])
tm.assert_index_equal(result, expected)
def test_copy_in_constructor():
levels = np.array(["a", "b", "c"])
codes = np.array([1, 1, 2, 0, 0, 1, 1])
val = codes[0]
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
assert mi.codes[0][0] == val
codes[0] = 15
assert mi.codes[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
# ----------------------------------------------------------------------------
# from_arrays
# ----------------------------------------------------------------------------
def test_from_arrays(idx):
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
assert result.levels[0].equals(Index([Timestamp("20130101")]))
assert result.levels[1].equals(Index(["a", "b"]))
def test_from_arrays_iterator(idx):
# GH 18434
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
tm.assert_index_equal(result, idx)
# invalid iterator input
msg = "Input must be a list / sequence of array-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(0)
def test_from_arrays_tuples(idx):
arrays = tuple(
tuple(np.asarray(lev).take(level_codes))
for lev, level_codes in zip(idx.levels, idx.codes)
)
# tuple of tuples as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
@pytest.mark.parametrize(
("idx1", "idx2"),
[
(
pd.period_range("2011-01-01", freq="D", periods=3),
pd.period_range("2015-01-01", freq="h", periods=3),
),
(
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
),
(
pd.timedelta_range("1 days", freq="D", periods=3),
pd.timedelta_range("2 hours", freq="h", periods=3),
),
],
)
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
result = MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed():
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = MultiIndex.from_arrays(
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
)
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical():
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
result = MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list("ABC")[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_sequence_of_arrays",
[
1,
[1],
[1, 2],
[[1], 2],
[1, [2]],
"a",
["a"],
["a", "b"],
[["a"], "b"],
(1,),
(1, 2),
([1], 2),
(1, [2]),
"a",
("a",),
("a", "b"),
(["a"], "b"),
[(1,), 2],
[1, (2,)],
[("a",), "b"],
((1,), 2),
(1, (2,)),
(("a",), "b"),
],
)
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
msg = "Input must be a list / sequence of array-likes"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
@pytest.mark.parametrize(
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
)
def test_from_arrays_different_lengths(idx1, idx2):
# see gh-13599
msg = "^all arrays must be same length$"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays([idx1, idx2])
def test_from_arrays_respects_none_names():
# GH27292
a = Series([1, 2, 3], name="foo")
b = Series(["a", "b", "c"], name="bar")
result = MultiIndex.from_arrays([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
def test_from_tuples():
msg = "Cannot infer number of levels from empty list"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples([])
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator():
# GH 18434
# input iterator for tuples
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
tm.assert_index_equal(result, expected)
# input non-iterables
msg = "Input must be a list / sequence of tuple-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples(0)
def test_from_tuples_empty():
# GH 16777
result = MultiIndex.from_tuples([], names=["a", "b"])
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_index_values(idx):
result = MultiIndex.from_tuples(idx)
assert (result.values == idx.values).all()
def test_tuples_with_name_string():
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
msg = "Names should be list-like for a MultiIndex"
with pytest.raises(ValueError, match=msg):
Index(li, name="abc")
with pytest.raises(ValueError, match=msg):
Index(li, name="a")
def test_from_tuples_with_tuple_label():
# GH 15457
expected = pd.DataFrame(
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
).set_index(["a", "b"])
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
tm.assert_frame_equal(expected, result)
# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
def test_from_product_empty_zero_levels():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_product([])
def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]
@pytest.mark.parametrize(
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
)
def test_from_product_empty_two_levels(first, second):
names = ["A", "B"]
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("N", list(range(4)))
def test_from_product_empty_three_levels(N):
# GH12258
names = ["A", "B", "C"]
lvl2 = list(range(N))
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
)
def test_from_product_invalid_input(invalid_input):
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(iterables=invalid_input)
def test_from_product_datetimeindex():
dt_index = date_range("2000-01-01", periods=2)
mi = MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike(
[
(1, Timestamp("2000-01-01")),
(1, Timestamp("2000-01-02")),
(2, Timestamp("2000-01-01")),
(2, Timestamp("2000-01-02")),
]
)
tm.assert_numpy_array_equal(mi.values, etalon)
def test_from_product_rangeindex():
# RangeIndex is preserved by factorize, so preserved in levels
rng = Index(range(5))
other = ["a", "b"]
mi = MultiIndex.from_product([rng, other])
tm.assert_index_equal(mi._levels[0], rng, exact=True)
@pytest.mark.parametrize("ordered", [False, True])
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
def test_from_product_index_series_categorical(ordered, f):
# GH13743
first = ["foo", "bar"]
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
expected = pd.CategoricalIndex(
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
)
result = MultiIndex.from_product([first, f(idx)])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_from_product():
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
result = MultiIndex.from_product([first, second], names=names)
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator():
# GH 18434
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
msg = "Input must be a list / sequence of iterables."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(0)
@pytest.mark.parametrize(
"a, b, expected_names",
[
(
Series([1, 2, 3], name="foo"),
Series(["a", "b"], name="bar"),
["foo", "bar"],
),
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
([1, 2, 3], ["a", "b"], None),
],
)
def test_from_product_infer_names(a, b, expected_names):
# GH27292
result = MultiIndex.from_product([a, b])
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=expected_names,
)
tm.assert_index_equal(result, expected)
def test_from_product_respects_none_names():
# GH27292
a = Series([1, 2, 3], name="foo")
b = Series(["a", "b"], name="bar")
result = MultiIndex.from_product([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=None,
)
tm.assert_index_equal(result, expected)
def test_from_product_readonly():
# GH#15286 passing read-only array to from_product
a = np.array(range(3))
b = ["a", "b"]
expected = MultiIndex.from_product([a, b])
a.setflags(write=False)
result = MultiIndex.from_product([a, b])
tm.assert_index_equal(result, expected)
def test_create_index_existing_name(idx):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
index = idx
index.names = ["foo", "bar"]
result = Index(index)
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
)
)
tm.assert_index_equal(result, expected)
result = Index(index, name="A")
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
),
name="A",
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_frame
# ----------------------------------------------------------------------------
def test_from_frame():
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
)
expected = MultiIndex.from_tuples(
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
)
result = MultiIndex.from_frame(df)
tm.assert_index_equal(expected, result)
def test_from_frame_missing_values_multiIndex():
# GH 39984
pa = pytest.importorskip("pyarrow")
df = pd.DataFrame(
{
"a": Series([1, 2, None], dtype="Int64"),
"b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
}
)
multi_indexed = MultiIndex.from_frame(df)
expected = MultiIndex.from_arrays(
[
Series([1, 2, None]).astype("Int64"),
pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
],
names=["a", "b"],
)
tm.assert_index_equal(multi_indexed, expected)
@pytest.mark.parametrize(
"non_frame",
[
Series([1, 2, 3, 4]),
[1, 2, 3, 4],
[[1, 2], [3, 4], [5, 6]],
Index([1, 2, 3, 4]),
np.array([[1, 2], [3, 4], [5, 6]]),
27,
],
)
def test_from_frame_error(non_frame):
# GH 22420
with pytest.raises(TypeError, match="Input must be a DataFrame"):
MultiIndex.from_frame(non_frame)
def test_from_frame_dtype_fidelity():
# GH 22420
df = pd.DataFrame(
{
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
"a": [1, 1, 1, 2, 2, 2],
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
"c": ["x", "x", "y", "z", "x", "y"],
}
)
original_dtypes = df.dtypes.to_dict()
expected_mi = MultiIndex.from_arrays(
[
date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
mi = MultiIndex.from_frame(df)
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
tm.assert_index_equal(expected_mi, mi)
assert original_dtypes == mi_dtypes
@pytest.mark.parametrize(
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
)
def test_from_frame_valid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
mi = MultiIndex.from_frame(df, names=names_in)
assert mi.names == names_out
@pytest.mark.parametrize(
"names,expected_error_msg",
[
("bad_input", "Names should be list-like for a MultiIndex"),
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
],
)
def test_from_frame_invalid_names(names, expected_error_msg):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
with pytest.raises(ValueError, match=expected_error_msg):
MultiIndex.from_frame(df, names=names)
def test_index_equal_empty_iterable():
# #16844
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(a, b)
def test_raise_invalid_sortorder():
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
# GH#28518
levels = [[0, 1], [0, 1, 2]]
# Correct sortorder
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
)
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
)
def test_datetimeindex():
idx1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
)
idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
idx = MultiIndex.from_arrays([idx1, idx2])
expected1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
)
tm.assert_index_equal(idx.levels[0], expected1)
tm.assert_index_equal(idx.levels[1], idx2)
# from datetime combos
# GH 7888
date1 = np.datetime64("today")
date2 = datetime.today()
date3 = Timestamp.today()
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
index = MultiIndex.from_product([[d1], [d2]])
assert isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)
# but NOT date objects, matching Index behavior
date4 = date.today()
index = MultiIndex.from_product([[date4], [date2]])
assert not isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)
def test_constructor_with_tz():
index = pd.DatetimeIndex(
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
)
columns = pd.DatetimeIndex(
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
)
result = MultiIndex.from_arrays([index, columns])
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
result = MultiIndex.from_arrays([Series(index), Series(columns)])
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
def test_multiindex_inference_consistency():
# check that inference behavior matches the base class
v = date.today()
arr = [v, v]
idx = Index(arr)
assert idx.dtype == object
mi = MultiIndex.from_arrays([arr])
lev = mi.levels[0]
assert lev.dtype == object
mi = MultiIndex.from_product([arr])
lev = mi.levels[0]
assert lev.dtype == object
mi = MultiIndex.from_tuples([(x,) for x in arr])
lev = mi.levels[0]
assert lev.dtype == object
def test_dtype_representation(using_infer_string):
# GH#46900
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
result = pmidx.dtypes
exp = "object" if not using_infer_string else "string"
expected = Series(
["int64", exp],
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
dtype=object,
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,164 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
)
import pandas._testing as tm
def test_to_numpy(idx):
result = idx.to_numpy()
exp = idx.values
tm.assert_numpy_array_equal(result, exp)
def test_to_frame():
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
msg = "'name' must be a list / sequence of column names."
with pytest.raises(TypeError, match=msg):
index.to_frame(name="first")
msg = "'name' should have same length as number of levels on index."
with pytest.raises(ValueError, match=msg):
index.to_frame(name=["first"])
# Tests for datetime index
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{
0: np.repeat(np.arange(5, dtype="int64"), 3),
1: np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(
{
"first": np.repeat(np.arange(5, dtype="int64"), 3),
"second": np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_frame_dtype_fidelity():
# GH 22420
mi = MultiIndex.from_arrays(
[
pd.date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
expected_df = DataFrame(
{
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
"a": [1, 1, 1, 2, 2, 2],
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
"c": ["x", "x", "y", "z", "x", "y"],
}
)
df = mi.to_frame(index=False)
df_dtypes = df.dtypes.to_dict()
tm.assert_frame_equal(df, expected_df)
assert original_dtypes == df_dtypes
def test_to_frame_resulting_column_order():
# GH 22420
expected = ["z", 0, "a"]
mi = MultiIndex.from_arrays(
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
)
result = mi.to_frame().columns.tolist()
assert result == expected
def test_to_frame_duplicate_labels():
# GH 45245
data = [(1, 2), (3, 4)]
names = ["a", "a"]
index = MultiIndex.from_tuples(data, names=names)
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
index.to_frame()
result = index.to_frame(allow_duplicates=True)
expected = DataFrame(data, index=index, columns=names)
tm.assert_frame_equal(result, expected)
names = [None, 0]
index = MultiIndex.from_tuples(data, names=names)
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
index.to_frame()
result = index.to_frame(allow_duplicates=True)
expected = DataFrame(data, index=index, columns=[0, 0])
tm.assert_frame_equal(result, expected)
def test_to_flat_index(idx):
expected = pd.Index(
(
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
),
tupleize_cols=False,
)
result = idx.to_flat_index()
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,96 @@
from copy import (
copy,
deepcopy,
)
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def assert_multiindex_copied(copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.codes, original.codes)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.codes, original.codes)
assert copy.codes is not original.codes
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(idx):
i_copy = idx.copy()
assert_multiindex_copied(i_copy, idx)
def test_shallow_copy(idx):
i_copy = idx._view()
assert_multiindex_copied(i_copy, idx)
def test_view(idx):
i_view = idx.view()
assert_multiindex_copied(i_view, idx)
@pytest.mark.parametrize("func", [copy, deepcopy])
def test_copy_and_deepcopy(func):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = func(idx)
assert idx_copy is not idx
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
def test_copy_method(deep):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(deep=deep)
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
@pytest.mark.parametrize(
"kwarg, value",
[
("names", ["third", "fourth"]),
],
)
def test_copy_method_kwargs(deep, kwarg, value):
# gh-12309: Check that the "name" argument as well other kwargs are honored
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
assert getattr(idx_copy, kwarg) == value
def test_copy_deep_false_retains_id():
# GH#47878
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
res = idx.copy(deep=False)
assert res._id is idx._id

View File

@ -0,0 +1,190 @@
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_drop(idx):
dropped = idx.drop([("foo", "two"), ("qux", "one")])
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
dropped2 = idx.drop(index)
expected = idx[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = idx.drop(["bar"])
expected = idx[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop("foo")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([("bar", "two")])
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
idx.drop([("bar", "two")])
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
idx.drop(index)
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(["foo", "two"])
# partially correct argument
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
idx.drop(mixed_index)
# error='ignore'
dropped = idx.drop(index, errors="ignore")
expected = idx[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(["foo", "two"], errors="ignore")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = idx.drop(["foo", ("qux", "one")])
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ["foo", ("qux", "one"), "two"]
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(mixed_index)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(idx):
index = idx[idx.get_loc("foo")]
dropped = index.droplevel(0)
assert dropped.name == "second"
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index.droplevel(0)
assert dropped.names == ("two", "three")
dropped = index.droplevel("two")
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_list():
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index[:2].droplevel(["three", "one"])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
dropped = index[:2].droplevel([])
expected = index[:2]
assert dropped.equals(expected)
msg = (
"Cannot remove 3 levels from an index with 3 levels: "
"at least one level must be left"
)
with pytest.raises(ValueError, match=msg):
index[:2].droplevel(["one", "two", "three"])
with pytest.raises(KeyError, match="'Level four not found'"):
index[:2].droplevel(["one", "four"])
def test_drop_not_lexsorted():
# GH 12078
# define the lexsorted version of the multi-index
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
assert lexsorted_mi._is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
)
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi._is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
def test_drop_with_nan_in_index(nulls_fixture):
# GH#18853
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop(pd.Timestamp("2001"), level="date")
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_drop_with_non_monotonic_duplicates():
# GH#33494
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
result = mi.drop((1, 2))
expected = MultiIndex.from_tuples([(2, 3)])
tm.assert_index_equal(result, expected)
def test_single_level_drop_partially_missing_elements():
# GH 37820
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
msg = r"labels \[4\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop(4, level=0)
with pytest.raises(KeyError, match=msg):
mi.drop([1, 4], level=0)
msg = r"labels \[nan\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan], level=0)
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan, 1, 2, 3], level=0)
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
msg = r"labels \['a'\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan, 1, "a"], level=0)
def test_droplevel_multiindex_one_level():
# GH#37208
index = MultiIndex.from_tuples([(2,)], names=("b",))
result = index.droplevel([])
expected = Index([2], name="b")
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,363 @@
from itertools import product
import numpy as np
import pytest
from pandas._libs import (
hashtable,
index as libindex,
)
from pandas import (
NA,
DatetimeIndex,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture
def idx_dup():
# compare tests/indexes/multi/conftest.py
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 0, 1, 1])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
@pytest.mark.parametrize("names", [None, ["first", "second"]])
def test_unique(names):
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
def test_unique_datetimelike():
idx1 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
)
idx2 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
tz="Asia/Tokyo",
)
result = MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
eidx2 = DatetimeIndex(
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
)
exp = MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
def test_unique_level(idx, level):
# GH #17896 - with level= argument
result = idx.unique(level=level)
expected = idx.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
def test_duplicate_multiindex_codes():
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]])
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
def test_duplicate_level_names(names):
# GH18872, GH19029
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_meta_data():
# GH 10115
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
for idx in [
mi,
mi.set_names([None, None]),
mi.set_names([None, "Num"]),
mi.set_names(["Upper", "Num"]),
]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_has_duplicates(idx, idx_dup):
# see fixtures
assert idx.is_unique is True
assert idx.has_duplicates is False
assert idx_dup.is_unique is False
assert idx_dup.has_duplicates is True
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
assert mi.is_unique is False
assert mi.has_duplicates is True
# single instance of NaN
mi_nan = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
)
assert mi_nan.is_unique is True
assert mi_nan.has_duplicates is False
# multiple instances of NaN
mi_nan_dup = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
)
assert mi_nan_dup.is_unique is False
assert mi_nan_dup.has_duplicates is True
def test_has_duplicates_from_tuples():
# GH 9075
t = [
("x", "out", "z", 5, "y", "in", "z", 169),
("x", "out", "z", 7, "y", "in", "z", 119),
("x", "out", "z", 9, "y", "in", "z", 135),
("x", "out", "z", 13, "y", "in", "z", 145),
("x", "out", "z", 14, "y", "in", "z", 158),
("x", "out", "z", 16, "y", "in", "z", 122),
("x", "out", "z", 17, "y", "in", "z", 160),
("x", "out", "z", 18, "y", "in", "z", 180),
("x", "out", "z", 20, "y", "in", "z", 143),
("x", "out", "z", 21, "y", "in", "z", 128),
("x", "out", "z", 22, "y", "in", "z", 129),
("x", "out", "z", 25, "y", "in", "z", 111),
("x", "out", "z", 28, "y", "in", "z", 114),
("x", "out", "z", 29, "y", "in", "z", 121),
("x", "out", "z", 31, "y", "in", "z", 126),
("x", "out", "z", 32, "y", "in", "z", 155),
("x", "out", "z", 33, "y", "in", "z", 123),
("x", "out", "z", 12, "y", "in", "z", 144),
]
mi = MultiIndex.from_tuples(t)
assert not mi.has_duplicates
@pytest.mark.parametrize("nlevels", [4, 8])
@pytest.mark.parametrize("with_nulls", [True, False])
def test_has_duplicates_overflow(nlevels, with_nulls):
# handle int64 overflow if possible
# no overflow with 4
# overflow possible with 8
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
codes[500] = -1 # common nan value
codes = [codes.copy() for i in range(nlevels)]
for i in range(nlevels):
codes[i][500 + i - nlevels // 2] = -1
codes += [np.array([-1, 1]).repeat(500)]
else:
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
mi = MultiIndex(levels=levels, codes=codes)
assert not mi.has_duplicates
# with a dup
if with_nulls:
def f(a):
return np.insert(a, 1000, a[0])
codes = list(map(f, codes))
mi = MultiIndex(levels=levels, codes=codes)
else:
values = mi.values.tolist()
mi = MultiIndex.from_tuples(values + [values[0]])
assert mi.has_duplicates
@pytest.mark.parametrize(
"keep, expected",
[
("first", np.array([False, False, False, True, True, False])),
("last", np.array([False, True, True, False, False, False])),
(False, np.array([False, True, True, True, True, False])),
],
)
def test_duplicated(idx_dup, keep, expected):
result = idx_dup.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.arm_slow
def test_duplicated_hashtable_impl(keep, monkeypatch):
# GH 9125
n, k = 6, 10
levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)]
codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels]
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", 50)
mi = MultiIndex(levels=levels, codes=codes)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("val", [101, 102])
def test_duplicated_with_nan(val):
# GH5873
mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
assert not mi.has_duplicates
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
@pytest.mark.parametrize("n", range(1, 6))
@pytest.mark.parametrize("m", range(1, 5))
def test_duplicated_with_nan_multi_shape(n, m):
# GH5873
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(
levels=[list("abcde")[:n], list("WXYZ")[:m]],
codes=np.random.default_rng(2).permutation(list(codes)).T,
)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))
def test_duplicated_drop_duplicates():
# GH#4060
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
expected = np.array([False, False, False, True, False, False], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(), expected)
expected = np.array([True, False, False, False, False, False])
duplicated = idx.duplicated(keep="last")
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
expected = np.array([True, False, False, True, False, False])
duplicated = idx.duplicated(keep=False)
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
@pytest.mark.parametrize(
"dtype",
[
np.complex64,
np.complex128,
],
)
def test_duplicated_series_complex_numbers(dtype):
# GH 17927
expected = Series(
[False, False, False, True, False, False, False, True, False, True],
dtype=bool,
)
result = Series(
[
np.nan + np.nan * 1j,
0,
1j,
1j,
1,
1 + 1j,
1 + 2j,
1 + 1j,
np.nan,
np.nan + np.nan * 1j,
],
dtype=dtype,
).duplicated()
tm.assert_series_equal(result, expected)
def test_midx_unique_ea_dtype():
# GH#48335
vals_a = Series([1, 2, NA, NA], dtype="Int64")
vals_b = np.array([1, 2, 3, 3])
midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"])
result = midx.unique()
exp_vals_a = Series([1, 2, NA], dtype="Int64")
exp_vals_b = np.array([1, 2, 3])
expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,284 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_any_real_numeric_dtype
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
def test_equals(idx):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.equals(idx.to_flat_index())
assert idx.equals(idx.to_flat_index().astype("category"))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(Series(idx))
def test_equals_op(idx):
# GH9947, GH10637
index_a = idx
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_compare_tuple():
# GH#21517
mi = MultiIndex.from_product([[1, 2]] * 2)
all_false = np.array([False, False, False, False])
result = mi == mi[0]
expected = np.array([True, False, False, False])
tm.assert_numpy_array_equal(result, expected)
result = mi != mi[0]
tm.assert_numpy_array_equal(result, ~expected)
result = mi < mi[0]
tm.assert_numpy_array_equal(result, all_false)
result = mi <= mi[0]
tm.assert_numpy_array_equal(result, expected)
result = mi > mi[0]
tm.assert_numpy_array_equal(result, ~expected)
result = mi >= mi[0]
tm.assert_numpy_array_equal(result, ~all_false)
def test_compare_tuple_strs():
# GH#34180
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
result = mi == ("c", "a")
expected = np.array([False, False, True])
tm.assert_numpy_array_equal(result, expected)
result = mi == ("c",)
expected = np.array([False, False, False])
tm.assert_numpy_array_equal(result, expected)
def test_equals_multi(idx):
assert idx.equals(idx)
assert not idx.equals(idx.values)
assert idx.equals(Index(idx.values))
assert idx.equal_levels(idx)
assert not idx.equals(idx[:-1])
assert not idx.equals(idx[-1])
# different number of levels
index = MultiIndex(
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 2, 3])
minor_codes = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
assert not idx.equal_levels(index)
# some of the labels are different
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
def test_identical(idx):
mi = idx.copy()
mi2 = idx.copy()
assert mi.identical(mi2)
mi = mi.set_names(["new1", "new2"])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(["new1", "new2"])
assert mi.identical(mi2)
mi4 = Index(mi.tolist(), tupleize_cols=False)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_equals_operator(idx):
# GH9785
assert (idx == idx).all()
def test_equals_missing_values():
# make sure take is not using -1
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_equals_missing_values_differently_sorted():
# GH#38439
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
assert not mi1.equals(mi2)
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
assert mi1.equals(mi2)
def test_is_():
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert not mi.is_(mi.set_names(["C", "D"]))
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
mi4 = mi4.set_levels([list(range(10)), list(range(10))])
assert not mi4.is_(mi3)
mi5 = mi.view()
mi5 = mi5.set_levels(mi5.levels)
assert not mi5.is_(mi)
def test_is_all_dates(idx):
assert not idx._is_all_dates
def test_is_numeric(idx):
# MultiIndex is never numeric
assert not is_any_real_numeric_dtype(idx)
def test_multiindex_compare():
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = Series([True, True])
result = Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = Series([False, False])
result = Series(midx > midx)
tm.assert_series_equal(result, expected)
def test_equals_ea_int_regular_int():
# GH#46026
mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
assert not mi1.equals(mi2)
assert not mi2.equals(mi1)

View File

@ -0,0 +1,249 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_format(idx):
msg = "MultiIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
idx.format()
idx[:0].format()
def test_format_integer_names():
index = MultiIndex(
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
)
msg = "MultiIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
index.format(names=True)
def test_format_sparse_config(idx):
# GH1538
msg = "MultiIndex.format is deprecated"
with pd.option_context("display.multi_sparse", False):
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.format()
assert result[1] == "foo two"
def test_format_sparse_display():
index = MultiIndex(
levels=[[0, 1], [0, 1], [0, 1], [0]],
codes=[
[0, 0, 0, 1, 1, 1],
[0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
],
)
msg = "MultiIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = index.format()
assert result[3] == "1 0 0 0"
def test_repr_with_unicode_data():
with pd.option_context("display.encoding", "UTF-8"):
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\" not in repr(index) # we don't want unicode-escaped
def test_repr_roundtrip_raises():
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
eval(repr(mi))
def test_unicode_string_with_unicode():
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
str(idx)
def test_repr_max_seq_item_setting(idx):
# GH10182
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert "..." not in str(idx)
class TestRepr:
def test_unicode_repr_issues(self):
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
index = MultiIndex(levels=levels, codes=codes)
repr(index.levels)
repr(index.get_level_values(1))
def test_repr_max_seq_items_equal_to_n(self, idx):
# display.max_seq_items == n
with pd.option_context("display.max_seq_items", 6):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
def test_repr(self, idx):
result = idx[:1].__repr__()
expected = """\
MultiIndex([('foo', 'one')],
names=['first', 'second'])"""
assert result == expected
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
with pd.option_context("display.max_seq_items", 5):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
...
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'], length=6)"""
assert result == expected
# display.max_seq_items == 1
with pd.option_context("display.max_seq_items", 1):
result = idx.__repr__()
expected = """\
MultiIndex([...
('qux', 'two')],
names=['first', ...], length=6)"""
assert result == expected
def test_rjust(self):
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
result = mi[:1].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi[::500].__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:08:20'),
('abc', 10, '2000-01-01 00:16:40'),
('abc', 10, '2000-01-01 00:25:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:00:01'),
( 'a', 9, '2000-01-01 00:00:02'),
( 'a', 9, '2000-01-01 00:00:03'),
( 'a', 9, '2000-01-01 00:00:04'),
( 'a', 9, '2000-01-01 00:00:05'),
( 'a', 9, '2000-01-01 00:00:06'),
( 'a', 9, '2000-01-01 00:00:07'),
( 'a', 9, '2000-01-01 00:00:08'),
( 'a', 9, '2000-01-01 00:00:09'),
...
('abc', 10, '2000-01-01 00:33:10'),
('abc', 10, '2000-01-01 00:33:11'),
('abc', 10, '2000-01-01 00:33:12'),
('abc', 10, '2000-01-01 00:33:13'),
('abc', 10, '2000-01-01 00:33:14'),
('abc', 10, '2000-01-01 00:33:15'),
('abc', 10, '2000-01-01 00:33:16'),
('abc', 10, '2000-01-01 00:33:17'),
('abc', 10, '2000-01-01 00:33:18'),
('abc', 10, '2000-01-01 00:33:19')],
names=['a', 'b', 'dti'], length=2000)"""
assert result == expected
def test_tuple_width(self):
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
levels = [ci, ci.codes + 9, dti, dti, dti]
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
mi = MultiIndex.from_arrays(levels, names=names)
result = mi[:1].__repr__()
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
assert result == expected
result = mi[:10].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
...
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
assert result == expected
def test_multiindex_long_element(self):
# Non-regression test towards GH#52960
data = MultiIndex.from_tuples([("c" * 62,)])
expected = (
"MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
"cccccccccccccccccccccc',)],\n )"
)
assert str(data) == expected

View File

@ -0,0 +1,124 @@
import numpy as np
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
MultiIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestGetLevelValues:
def test_get_level_values_box_datetime64(self):
dates = date_range("1/1/2000", periods=4)
levels = [dates, [0, 1]]
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
index = MultiIndex(levels=levels, codes=codes)
assert isinstance(index.get_level_values(0)[0], Timestamp)
def test_get_level_values(idx):
result = idx.get_level_values(0)
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
tm.assert_index_equal(result, expected)
assert result.name == "first"
result = idx.get_level_values("first")
expected = idx.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
)
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_level_values_all_na():
# GH#17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = Index(["a", np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_int_with_na():
# GH#17924
arrays = [["a", "b", "b"], [1, np.nan, 2]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na():
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = Index(["a", np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_when_periods():
# GH33131. See also discussion in GH32669.
# This test can probably be removed when PeriodIndex._engine is removed.
from pandas import (
Period,
PeriodIndex,
)
idx = MultiIndex.from_arrays(
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
)
idx2 = MultiIndex.from_arrays(
[idx._get_level_values(level) for level in range(idx.nlevels)]
)
assert all(x.is_monotonic_increasing for x in idx2.levels)
def test_values_loses_freq_of_underlying_index():
# GH#49054
idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME"))
expected = idx.copy(deep=True)
idx2 = Index([1, 2, 3])
midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]])
midx.values
assert idx.freq is not None
tm.assert_index_equal(idx, expected)

View File

@ -0,0 +1,384 @@
import numpy as np
import pytest
from pandas.compat import PY311
from pandas.core.dtypes.dtypes import DatetimeTZDtype
import pandas as pd
from pandas import (
CategoricalIndex,
MultiIndex,
)
import pandas._testing as tm
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
def test_get_level_number_integer(idx):
idx.names = [1, 0]
assert idx._get_level_number(1) == 0
assert idx._get_level_number(0) == 1
msg = "Too many levels: Index has only 2 levels, not 3"
with pytest.raises(IndexError, match=msg):
idx._get_level_number(2)
with pytest.raises(KeyError, match="Level fourth not found"):
idx._get_level_number("fourth")
def test_get_dtypes(using_infer_string):
# Test MultiIndex.dtypes (# Gh37062)
idx_multitype = MultiIndex.from_product(
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
names=["int", "string", "dt"],
)
exp = "object" if not using_infer_string else "string"
expected = pd.Series(
{
"int": np.dtype("int64"),
"string": exp,
"dt": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)
def test_get_dtypes_no_level_name(using_infer_string):
# Test MultiIndex.dtypes (# GH38580 )
idx_multitype = MultiIndex.from_product(
[
[1, 2, 3],
["a", "b", "c"],
pd.date_range("20200101", periods=2, tz="UTC"),
],
)
exp = "object" if not using_infer_string else "string"
expected = pd.Series(
{
"level_0": np.dtype("int64"),
"level_1": exp,
"level_2": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)
def test_get_dtypes_duplicate_level_names(using_infer_string):
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
result = MultiIndex.from_product(
[
[1, 2, 3],
["a", "b", "c"],
pd.date_range("20200101", periods=2, tz="UTC"),
],
names=["A", "A", "A"],
).dtypes
exp = "object" if not using_infer_string else "string"
expected = pd.Series(
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
index=["A", "A", "A"],
)
tm.assert_series_equal(result, expected)
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
with pytest.raises(IndexError, match="Too many levels"):
frame.index._get_level_number(2)
with pytest.raises(IndexError, match="not a valid level number"):
frame.index._get_level_number(-3)
def test_set_name_methods(idx):
# so long as these are synonyms, we don't need to test set_names
index_names = ["first", "second"]
assert idx.rename == idx.set_names
new_names = [name + "SUFFIX" for name in index_names]
ind = idx.set_names(new_names)
assert idx.names == index_names
assert ind.names == new_names
msg = "Length of names must match number of levels in MultiIndex"
with pytest.raises(ValueError, match=msg):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = idx.set_names(new_names[0], level=0)
assert idx.names == index_names
assert ind.names == [new_names[0], index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], index_names[1]]
# set names for multiple levels
ind = idx.set_names(new_names, level=[0, 1])
assert idx.names == index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
def test_set_levels_codes_directly(idx):
# setting levels/codes directly raises AttributeError
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
msg = "Can't set attribute"
with pytest.raises(AttributeError, match=msg):
idx.levels = new_levels
msg = (
"property 'codes' of 'MultiIndex' object has no setter"
if PY311
else "can't set attribute"
)
with pytest.raises(AttributeError, match=msg):
idx.codes = new_codes
def test_set_levels(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
# level changing [w/o mutation]
ind2 = idx.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing specific level [w/o mutation]
ind2 = idx.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = idx.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = idx.copy()
with pytest.raises(ValueError, match="^On"):
idx.set_levels(["c"], level=0)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(ValueError, match="^On"):
idx.set_codes([0, 1, 2, 3, 4, 5], level=0)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
idx.set_levels("c", level=0)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
idx.set_codes(1, level=0)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
def test_set_codes(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
# changing codes w/o mutation
ind2 = idx.set_codes(new_codes)
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# codes changing specific level w/o mutation
ind2 = idx.set_codes(new_codes[0], level=0)
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.set_codes(new_codes[1], level=1)
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels w/o mutation
ind2 = idx.set_codes(new_codes, level=[0, 1])
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing for levels of different magnitude of categories
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
new_codes = range(129, -1, -1)
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
# [w/o mutation]
result = ind.set_codes(codes=new_codes, level=1)
assert result.equals(expected)
def test_set_levels_codes_names_bad_input(idx):
levels, codes = idx.levels, idx.codes
names = idx.names
with pytest.raises(ValueError, match="Length of levels"):
idx.set_levels([levels[0]])
with pytest.raises(ValueError, match="Length of codes"):
idx.set_codes([codes[0]])
with pytest.raises(ValueError, match="Length of names"):
idx.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list-like"):
idx.set_names(names[0])
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_levels(levels, level=0)
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_codes(codes, level=0)
# should have equal lengths
with pytest.raises(ValueError, match="Length of names"):
idx.set_names(names[0], level=[0, 1])
with pytest.raises(TypeError, match="Names must be a"):
idx.set_names(names, level=0)
@pytest.mark.parametrize("inplace", [True, False])
def test_set_names_with_nlevel_1(inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
m = MultiIndex.from_product([[0, 1]])
result = m.set_names("first", level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("ordered", [True, False])
def test_set_levels_categorical(ordered):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, level=0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_set_value_keeps_names():
# motivating example from #3742
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
lev2 = ["1", "2", "3"] * 2
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
df = pd.DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
columns=["one", "two", "three", "four"],
index=idx,
)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
df.at[("grethe", "4"), "one"] = 99.34
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
def test_set_levels_with_iterable():
# GH23273
sizes = [1, 2, 3]
colors = ["black"] * 3
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
expected_sizes = [3, 2, 1]
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
tm.assert_index_equal(result, expected)
def test_set_empty_level():
# GH#48636
midx = MultiIndex.from_arrays([[]], names=["A"])
result = midx.set_levels(pd.DatetimeIndex([]), level=0)
expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"])
tm.assert_index_equal(result, expected)
def test_set_levels_pos_args_removal():
# https://github.com/pandas-dev/pandas/issues/41485
idx = MultiIndex.from_tuples(
[
(1, "one"),
(3, "one"),
],
names=["foo", "bar"],
)
with pytest.raises(TypeError, match="positional arguments"):
idx.set_levels(["a", "b", "c"], 0)
with pytest.raises(TypeError, match="positional arguments"):
idx.set_codes([[0, 1], [1, 0]], 0)
def test_set_levels_categorical_keep_dtype():
# GH#52125
midx = MultiIndex.from_arrays([[5, 6]])
result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0)
expected = MultiIndex.from_arrays([pd.Categorical([1, 2])])
tm.assert_index_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,289 @@
import re
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import (
Index,
IntervalIndex,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm
def test_labels_dtypes():
# GH 8456
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
assert i.codes[0].dtype == "int8"
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(40)])
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(400)])
assert i.codes[1].dtype == "int16"
i = MultiIndex.from_product([["a"], range(40000)])
assert i.codes[1].dtype == "int32"
i = MultiIndex.from_product([["a"], range(1000)])
assert (i.codes[0] >= 0).all()
assert (i.codes[1] >= 0).all()
def test_values_boxed():
tuples = [
(1, pd.Timestamp("2000-01-01")),
(2, pd.NaT),
(3, pd.Timestamp("2000-01-03")),
(1, pd.Timestamp("2000-01-04")),
(2, pd.Timestamp("2000-01-02")),
(3, pd.Timestamp("2000-01-03")),
]
result = MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10**18, 10**18 + 5)
naive = pd.DatetimeIndex(ints)
aware = pd.DatetimeIndex(ints, tz="US/Central")
idx = MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq="D")
idx = MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = Index([x[0] for x in result])
tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = Index([x[0] for x in result])
tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_consistency():
# need to construct an overflow
major_axis = list(range(70000))
minor_axis = list(range(10))
major_codes = np.arange(70000)
minor_codes = np.repeat(range(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
# inconsistent
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert index.is_unique is False
@pytest.mark.slow
def test_hash_collisions(monkeypatch):
# non-smoke test that we don't get hash collisions
size_cutoff = 50
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
index = MultiIndex.from_product(
[np.arange(8), np.arange(8)], names=["one", "two"]
)
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_dims():
pass
def test_take_invalid_kwargs():
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = MultiIndex.from_product(vals, names=["str", "dt"])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_isna_behavior(idx):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
pd.isna(idx)
def test_large_multiindex_error(monkeypatch):
# GH12527
size_cutoff = 50
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
df_below_cutoff = pd.DataFrame(
1,
index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
columns=["dest"],
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_below_cutoff.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_below_cutoff.loc[(3, 0), "dest"]
df_above_cutoff = pd.DataFrame(
1,
index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
columns=["dest"],
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_above_cutoff.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_above_cutoff.loc[(3, 0), "dest"]
def test_mi_hashtable_populated_attribute_error(monkeypatch):
# GH 18165
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
r = range(50)
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
df["a"].foo()
def test_can_hold_identifiers(idx):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_metadata_immutable(idx):
levels, codes = idx.levels, idx.codes
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile("does not support mutable operations")
with pytest.raises(TypeError, match=mutable_regex):
levels[0] = levels[0]
with pytest.raises(TypeError, match=mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with pytest.raises(TypeError, match=mutable_regex):
codes[0] = codes[0]
with pytest.raises(ValueError, match="assignment destination is read-only"):
codes[0][0] = codes[0][0]
# and for names
names = idx.names
with pytest.raises(TypeError, match=mutable_regex):
names[0] = names[0]
def test_level_setting_resets_attributes():
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert ind.is_monotonic_increasing
ind = ind.set_levels([["A", "B"], [1, 3, 2]])
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic_increasing
def test_rangeindex_fallback_coercion_bug():
# GH 12893
df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat(
{"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
axis=1,
)
df.index.names = ["fizz", "buzz"]
expected = pd.DataFrame(
{"df2": np.arange(100), "df1": np.arange(100)},
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
)
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values("fizz")
expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values("buzz")
expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
tm.assert_index_equal(result, expected)
def test_memory_usage(idx):
result = idx.memory_usage()
if len(idx):
idx.get_loc(idx[0])
result2 = idx.memory_usage()
result3 = idx.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(idx, (RangeIndex, IntervalIndex)):
assert result2 > result
if idx.inferred_type == "object":
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_nlevels(idx):
assert idx.nlevels == 2

View File

@ -0,0 +1,103 @@
import numpy as np
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def test_isin_nan():
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
tm.assert_numpy_array_equal(
idx.isin([("bar", float("nan"))]), np.array([False, True])
)
def test_isin_missing(nulls_fixture):
# GH48905
mi1 = MultiIndex.from_tuples([(1, nulls_fixture)])
mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)])
result = mi2.isin(mi1)
expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)
def test_isin():
values = [("foo", 2), ("bar", 3), ("quux", 4)]
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
def test_isin_level_kwarg():
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
vals_0 = ["foo", "bar", "quux"]
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
msg = "Too many levels: Index has only 2 levels, not 6"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=5)
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=-5)
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
idx.isin(vals_0, level=1.0)
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
idx.isin(vals_1, level=-1.0)
with pytest.raises(KeyError, match="'Level A not found'"):
idx.isin(vals_1, level="A")
idx.names = ["A", "B"]
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
with pytest.raises(KeyError, match="'Level C not found'"):
idx.isin(vals_1, level="C")
@pytest.mark.parametrize(
"labels,expected,level",
[
([("b", np.nan)], np.array([False, False, True]), None),
([np.nan, "a"], np.array([True, True, False]), 0),
(["d", np.nan], np.array([False, True, True]), 1),
],
)
def test_isin_multi_index_with_missing_value(labels, expected, level):
# GH 19132
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
result = midx.isin(labels, level=level)
tm.assert_numpy_array_equal(result, expected)
def test_isin_empty():
# GH#51599
midx = MultiIndex.from_arrays([[1, 2], [3, 4]])
result = midx.isin([])
expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)
def test_isin_generator():
# GH#52568
midx = MultiIndex.from_tuples([(1, 2)])
result = midx.isin(x for x in [(1, 2)])
expected = np.array([True])
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,268 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
Interval,
MultiIndex,
Series,
StringDtype,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
)
def test_join_level(idx, other, join_type):
join_index, lidx, ridx = other.join(
idx, how=join_type, level="second", return_indexers=True
)
exp_level = other.join(idx.levels[1], how=join_type)
assert join_index.levels[0].equals(idx.levels[0])
assert join_index.levels[1].equals(exp_level)
# pare down levels
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
exp_values = idx.values[mask]
tm.assert_numpy_array_equal(join_index.values, exp_values)
if join_type in ("outer", "inner"):
join_index2, ridx2, lidx2 = idx.join(
other, how=join_type, level="second", return_indexers=True
)
assert join_index.equals(join_index2)
tm.assert_numpy_array_equal(lidx, lidx2)
tm.assert_numpy_array_equal(ridx, ridx2)
tm.assert_numpy_array_equal(join_index2.values, exp_values)
def test_join_level_corner_case(idx):
# some corner cases
index = Index(["three", "one", "two"])
result = index.join(idx, level="second")
assert isinstance(result, MultiIndex)
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
idx.join(idx, level=1)
def test_join_self(idx, join_type):
result = idx.join(idx, how=join_type)
expected = idx
if join_type == "outer":
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
def test_join_multi():
# GH 10665
midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
idx = Index([1, 2, 5], name="b")
# inner
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# keep MultiIndex
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
exp_ridx = np.array(
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_multi_wrong_order():
# GH 25760
# GH 28956
midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
tm.assert_index_equal(midx1, join_idx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_join_multi_return_indexers():
# GH 34074
midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
result = midx1.join(midx2, return_indexers=False)
tm.assert_index_equal(result, midx1)
def test_join_overlapping_interval_level():
# GH 44096
idx_1 = MultiIndex.from_tuples(
[
(1, Interval(0.0, 1.0)),
(1, Interval(1.0, 2.0)),
(1, Interval(2.0, 5.0)),
(2, Interval(0.0, 1.0)),
(2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
(2, Interval(3.0, 5.0)),
],
names=["num", "interval"],
)
idx_2 = MultiIndex.from_tuples(
[
(1, Interval(2.0, 5.0)),
(1, Interval(0.0, 1.0)),
(1, Interval(1.0, 2.0)),
(2, Interval(3.0, 5.0)),
(2, Interval(0.0, 1.0)),
(2, Interval(1.0, 3.0)),
],
names=["num", "interval"],
)
expected = MultiIndex.from_tuples(
[
(1, Interval(0.0, 1.0)),
(1, Interval(1.0, 2.0)),
(1, Interval(2.0, 5.0)),
(2, Interval(0.0, 1.0)),
(2, Interval(1.0, 3.0)),
(2, Interval(3.0, 5.0)),
],
names=["num", "interval"],
)
result = idx_1.join(idx_2, how="outer")
tm.assert_index_equal(result, expected)
def test_join_midx_ea():
# GH#49277
midx = MultiIndex.from_arrays(
[Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")],
names=["a", "b"],
)
midx2 = MultiIndex.from_arrays(
[Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"]
)
result = midx.join(midx2, how="inner")
expected = MultiIndex.from_arrays(
[
Series([1, 1], dtype="Int64"),
Series([1, 2], dtype="Int64"),
Series([3, 3], dtype="Int64"),
],
names=["a", "b", "c"],
)
tm.assert_index_equal(result, expected)
def test_join_midx_string():
# GH#49277
midx = MultiIndex.from_arrays(
[
Series(["a", "a", "c"], dtype=StringDtype()),
Series(["a", "b", "c"], dtype=StringDtype()),
],
names=["a", "b"],
)
midx2 = MultiIndex.from_arrays(
[Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())],
names=["a", "c"],
)
result = midx.join(midx2, how="inner")
expected = MultiIndex.from_arrays(
[
Series(["a", "a"], dtype=StringDtype()),
Series(["a", "b"], dtype=StringDtype()),
Series(["c", "c"], dtype=StringDtype()),
],
names=["a", "b", "c"],
)
tm.assert_index_equal(result, expected)
def test_join_multi_with_nan():
# GH29252
df1 = DataFrame(
data={"col1": [1.1, 1.2]},
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
)
df2 = DataFrame(
data={"col2": [2.1, 2.2]},
index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]),
)
result = df1.join(df2)
expected = DataFrame(
data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]},
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("val", [0, 5])
def test_join_dtypes(any_numeric_ea_dtype, val):
# GH#49830
midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]])
midx2 = MultiIndex.from_arrays(
[Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]]
)
result = midx.join(midx2, how="outer")
expected = MultiIndex.from_arrays(
[Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]]
).sort_values()
tm.assert_index_equal(result, expected)
def test_join_dtypes_all_nan(any_numeric_ea_dtype):
# GH#49830
midx = MultiIndex.from_arrays(
[Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]]
)
midx2 = MultiIndex.from_arrays(
[Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]]
)
result = midx.join(midx2, how="outer")
expected = MultiIndex.from_arrays(
[
Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype),
[np.nan, np.nan, np.nan, np.nan],
]
)
tm.assert_index_equal(result, expected)
def test_join_index_levels():
# GH#53093
midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")])
midx2 = MultiIndex.from_tuples([("a", "2019-01-31")])
result = midx.join(midx2, how="outer")
expected = MultiIndex.from_tuples(
[("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")]
)
tm.assert_index_equal(result.levels[1], expected.levels[1])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,46 @@
from pandas import MultiIndex
class TestIsLexsorted:
def test_is_lexsorted(self):
levels = [[0, 1], [0, 1, 2]]
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
)
assert index._is_lexsorted()
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
)
assert not index._is_lexsorted()
index = MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
)
assert not index._is_lexsorted()
assert index._lexsort_depth == 0
class TestLexsortDepth:
def test_lexsort_depth(self):
# Test that lexsort_depth return the correct sortorder
# when it was given to the MultiIndex const.
# GH#28518
levels = [[0, 1], [0, 1, 2]]
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)
assert index._lexsort_depth == 2
index = MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
)
assert index._lexsort_depth == 1
index = MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
)
assert index._lexsort_depth == 0

View File

@ -0,0 +1,111 @@
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas._testing as tm
def test_fillna(idx):
# GH 11343
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.fillna(idx[0])
def test_dropna():
# GH 6194
idx = MultiIndex.from_arrays(
[
[1, np.nan, 3, np.nan, 5],
[1, 2, np.nan, np.nan, 5],
["a", "b", "c", np.nan, "e"],
]
)
exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
tm.assert_index_equal(idx.dropna(), exp)
tm.assert_index_equal(idx.dropna(how="any"), exp)
exp = MultiIndex.from_arrays(
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
)
tm.assert_index_equal(idx.dropna(how="all"), exp)
msg = "invalid how option: xxx"
with pytest.raises(ValueError, match=msg):
idx.dropna(how="xxx")
# GH26408
# test if missing values are dropped for multiindex constructed
# from codes and values
idx = MultiIndex(
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
)
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
tm.assert_index_equal(idx.dropna(), expected)
tm.assert_index_equal(idx.dropna(how="any"), expected)
expected = MultiIndex.from_arrays(
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
)
tm.assert_index_equal(idx.dropna(how="all"), expected)
def test_nulls(idx):
# this is really a smoke test for the methods
# as these are adequately tested for function elsewhere
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.isna()
@pytest.mark.xfail(reason="isna is not defined for MultiIndex")
def test_hasnans_isnans(idx):
# GH 11343, added tests for hasnans / isnans
index = idx.copy()
# cases in indices doesn't include NaN
expected = np.array([False] * len(index), dtype=bool)
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is False
index = idx.copy()
values = index.values
values[1] = np.nan
index = type(idx)(values)
expected = np.array([False] * len(index), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(index._isnan, expected)
assert index.hasnans is True
def test_nan_stays_float():
# GH 7031
idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1])
idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
idxm = idx0.join(idx1, how="outer")
assert pd.isna(idx0.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
df0 = pd.DataFrame([[1, 2]], index=idx0)
df1 = pd.DataFrame([[3, 4]], index=idx1)
dfm = df0 - df1
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
def test_tuples_have_na():
index = MultiIndex(
levels=[[1, 0], [0, 1, 2, 3]],
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
)
assert pd.isna(index[4][0])
assert pd.isna(index.values[4][0])

View File

@ -0,0 +1,188 @@
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
)
def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
# string ordering
mi = lexsorted_two_level_string_multiindex
assert mi.is_monotonic_increasing is False
assert Index(mi.values).is_monotonic_increasing is False
assert mi._is_strictly_monotonic_increasing is False
assert Index(mi.values)._is_strictly_monotonic_increasing is False
def test_is_monotonic_increasing():
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
assert i.is_monotonic_increasing is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values).is_monotonic_increasing is True
assert i._is_strictly_monotonic_increasing is True
i = MultiIndex.from_product(
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
)
assert i.is_monotonic_increasing is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic_increasing is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product(
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
)
assert i.is_monotonic_increasing is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic_increasing is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
assert i.is_monotonic_increasing is False
assert i._is_strictly_monotonic_increasing is False
assert Index(i.values).is_monotonic_increasing is False
assert Index(i.values)._is_strictly_monotonic_increasing is False
i = MultiIndex(
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_increasing is True
assert Index(i.values).is_monotonic_increasing is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[
[1, 2, 3, 4],
[
"gb00b03mlx29",
"lu0197800237",
"nl0000289783",
"nl0000289965",
"nl0000301109",
],
],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=["household_id", "asset_id"],
)
assert i.is_monotonic_increasing is False
assert i._is_strictly_monotonic_increasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_increasing is True
assert Index(i.values).is_monotonic_increasing is True
assert i._is_strictly_monotonic_increasing is True
assert Index(i.values)._is_strictly_monotonic_increasing is True
def test_is_monotonic_decreasing():
i = MultiIndex.from_product(
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
i = MultiIndex.from_product(
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product(
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
# string ordering
i = MultiIndex(
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_decreasing is False
assert Index(i.values).is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
assert Index(i.values)._is_strictly_monotonic_decreasing is False
i = MultiIndex(
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[
[4, 3, 2, 1],
[
"nl0000301109",
"nl0000289965",
"nl0000289783",
"lu0197800237",
"gb00b03mlx29",
],
],
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=["household_id", "asset_id"],
)
assert i.is_monotonic_decreasing is False
assert i._is_strictly_monotonic_decreasing is False
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_decreasing is True
assert Index(i.values).is_monotonic_decreasing is True
assert i._is_strictly_monotonic_decreasing is True
assert Index(i.values)._is_strictly_monotonic_decreasing is True
def test_is_strictly_monotonic_increasing():
idx = MultiIndex(
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is False
def test_is_strictly_monotonic_decreasing():
idx = MultiIndex(
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
)
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
@pytest.mark.parametrize(
"values",
[[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
)
def test_is_monotonic_with_nans(values, attr):
# GH: 37220
idx = MultiIndex.from_tuples(values, names=["test"])
assert getattr(idx, attr) is False

View File

@ -0,0 +1,201 @@
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas._testing as tm
def check_level_names(index, names):
assert [level.name for level in index.levels] == list(names)
def test_slice_keep_name():
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
assert x[1:].names == x.names
def test_index_name_retained():
# GH9857
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
result = result.set_index("z")
result.loc[10] = [9, 10]
df_expected = pd.DataFrame(
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
)
df_expected = df_expected.set_index("z")
tm.assert_frame_equal(result, df_expected)
def test_changing_names(idx):
assert [level.name for level in idx.levels] == ["first", "second"]
view = idx.view()
copy = idx.copy()
shallow_copy = idx._view()
# changing names should not change level names on object
new_names = [name + "a" for name in idx.names]
idx.names = new_names
check_level_names(idx, ["firsta", "seconda"])
# and not on copies
check_level_names(view, ["first", "second"])
check_level_names(copy, ["first", "second"])
check_level_names(shallow_copy, ["first", "second"])
# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
check_level_names(idx, ["firsta", "seconda"])
def test_take_preserve_name(idx):
taken = idx.take([3, 0, 1])
assert taken.names == idx.names
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = MultiIndex.from_tuples([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
multi_idx1 = multi_idx.copy()
assert multi_idx.equals(multi_idx1)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx1.names == ["MyName1", "MyName2"]
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
assert multi_idx.equals(multi_idx2)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx2.names == ["NewName1", "NewName2"]
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
assert multi_idx.equals(multi_idx3)
assert multi_idx.names == ["MyName1", "MyName2"]
assert multi_idx3.names == ["NewName1", "NewName2"]
# gh-35592
with pytest.raises(ValueError, match="Length of new names must be 2, got 1"):
multi_idx.copy(names=["mario"])
with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"):
multi_idx.copy(names=[["mario"], ["luigi"]])
def test_names(idx):
# names are assigned in setup
assert idx.names == ["first", "second"]
level_names = [level.name for level in idx.levels]
assert level_names == idx.names
# setting bad names on existing
index = idx
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", list(index.names) + ["third"])
with pytest.raises(ValueError, match="^Length of names"):
setattr(index, "names", [])
# initializing with bad names (should always be equivalent)
major_axis, minor_axis = idx.levels
major_codes, minor_codes = idx.codes
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["first"],
)
with pytest.raises(ValueError, match="^Length of names"):
MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["first", "second", "third"],
)
# names are assigned on index, but not transferred to the levels
index.names = ["a", "b"]
level_names = [level.name for level in index.levels]
assert level_names == ["a", "b"]
def test_duplicate_level_names_access_raises(idx):
# GH19029
idx.names = ["foo", "foo"]
with pytest.raises(ValueError, match="name foo occurs multiple times"):
idx._get_level_number("foo")
def test_get_names_from_levels():
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
assert idx.levels[0].name == "a"
assert idx.levels[1].name == "b"
def test_setting_names_from_levels_raises():
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
with pytest.raises(RuntimeError, match="set_names"):
idx.levels[0].name = "foo"
with pytest.raises(RuntimeError, match="set_names"):
idx.levels[1].name = "foo"
new = pd.Series(1, index=idx.levels[0])
with pytest.raises(RuntimeError, match="set_names"):
new.index.name = "bar"
assert pd.Index._no_setting_name is False
assert pd.RangeIndex._no_setting_name is False
@pytest.mark.parametrize("func", ["rename", "set_names"])
@pytest.mark.parametrize(
"rename_dict, exp_names",
[
({"x": "z"}, ["z", "y", "z"]),
({"x": "z", "y": "x"}, ["z", "x", "z"]),
({"y": "z"}, ["x", "z", "x"]),
({}, ["x", "y", "x"]),
({"z": "a"}, ["x", "y", "x"]),
({"y": "z", "a": "b"}, ["x", "z", "x"]),
],
)
def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names):
# GH#20421
mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"])
result = getattr(mi, func)(rename_dict)
expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("func", ["rename", "set_names"])
@pytest.mark.parametrize(
"rename_dict, exp_names",
[
({"x": "z"}, ["z", "y"]),
({"x": "z", "y": "x"}, ["z", "x"]),
({"a": "z"}, ["x", "y"]),
({}, ["x", "y"]),
],
)
def test_name_mi_with_dict_like(func, rename_dict, exp_names):
# GH#20421
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
result = getattr(mi, func)(rename_dict)
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names)
tm.assert_index_equal(result, expected)
def test_index_name_with_dict_like_raising():
# GH#20421
ix = pd.Index([1, 2])
msg = "Can only pass dict-like as `names` for MultiIndex."
with pytest.raises(TypeError, match=msg):
ix.set_names({"x": "z"})
def test_multiindex_name_and_level_raising():
# GH#20421
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."):
mi.set_names(names={"x": "z"}, level={"x": "z"})

View File

@ -0,0 +1,148 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
IndexSlice,
MultiIndex,
date_range,
)
import pandas._testing as tm
@pytest.fixture
def df():
# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
dr = date_range("2016-01-01", "2016-01-03", freq="12h")
abc = ["a", "b", "c"]
mi = MultiIndex.from_product([dr, abc])
frame = DataFrame({"c1": range(15)}, index=mi)
return frame
def test_partial_string_matching_single_index(df):
# partial string matching on a single index
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
df_swap = df_swap.sort_index()
just_a = df_swap.loc["a"]
result = just_a.loc["2016-01-01"]
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)
def test_get_loc_partial_timestamp_multiindex(df):
mi = df.index
key = ("2016-01-01", "a")
loc = mi.get_loc(key)
expected = np.zeros(len(mi), dtype=bool)
expected[[0, 3]] = True
tm.assert_numpy_array_equal(loc, expected)
key2 = ("2016-01-02", "a")
loc2 = mi.get_loc(key2)
expected2 = np.zeros(len(mi), dtype=bool)
expected2[[6, 9]] = True
tm.assert_numpy_array_equal(loc2, expected2)
key3 = ("2016-01", "a")
loc3 = mi.get_loc(key3)
expected3 = np.zeros(len(mi), dtype=bool)
expected3[mi.get_level_values(1).get_loc("a")] = True
tm.assert_numpy_array_equal(loc3, expected3)
key4 = ("2016", "a")
loc4 = mi.get_loc(key4)
expected4 = expected3
tm.assert_numpy_array_equal(loc4, expected4)
# non-monotonic
taker = np.arange(len(mi), dtype=np.intp)
taker[::2] = taker[::-2]
mi2 = mi.take(taker)
loc5 = mi2.get_loc(key)
expected5 = np.zeros(len(mi2), dtype=bool)
expected5[[3, 14]] = True
tm.assert_numpy_array_equal(loc5, expected5)
def test_partial_string_timestamp_multiindex(df):
# GH10331
df_swap = df.swaplevel(0, 1).sort_index()
SLC = IndexSlice
# indexing with IndexSlice
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
expected = df
tm.assert_frame_equal(result, expected)
# match on secondary index
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# partial string match on year only
result = df.loc["2016"]
expected = df
tm.assert_frame_equal(result, expected)
# partial string match on date
result = df.loc["2016-01-01"]
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)
# partial string match on date and hour, from middle
result = df.loc["2016-01-02 12"]
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
# that level, so that level gets dropped
expected = df.iloc[9:12].droplevel(0)
tm.assert_frame_equal(result, expected)
# partial string match on secondary index
result = df_swap.loc[SLC[:, "2016-01-02"], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)
# tuple selector with partial string match on date
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
result = df.loc[("2016-01-01", "a"), :]
expected = df.iloc[[0, 3]]
expected = df.iloc[[0, 3]].droplevel(1)
tm.assert_frame_equal(result, expected)
# Slicing date on first level should break (of course) bc the DTI is the
# second level on df_swap
with pytest.raises(KeyError, match="'2016-01-01'"):
df_swap.loc["2016-01-01"]
def test_partial_string_timestamp_multiindex_str_key_raises(df):
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError, match="'2016-01-01'"):
df["2016-01-01"]
def test_partial_string_timestamp_multiindex_daily_resolution(df):
# GH12685 (partial string with daily resolution or below)
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,10 @@
import pytest
from pandas import MultiIndex
def test_pickle_compat_construction():
# this is testing for pickle compat
# need an object to create with
with pytest.raises(TypeError, match="Must pass both levels and codes"):
MultiIndex()

View File

@ -0,0 +1,174 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_reindex(idx):
result, indexer = idx.reindex(list(idx[:4]))
assert isinstance(result, MultiIndex)
assert result.names == ["first", "second"]
assert [level.name for level in result.levels] == ["first", "second"]
result, indexer = idx.reindex(list(idx))
assert isinstance(result, MultiIndex)
assert indexer is None
assert result.names == ["first", "second"]
assert [level.name for level in result.levels] == ["first", "second"]
def test_reindex_level(idx):
index = Index(["one"])
target, indexer = idx.reindex(index, level="second")
target2, indexer2 = index.reindex(idx, level="second")
exp_index = idx.join(index, level="second", how="right")
exp_index2 = idx.join(index, level="second", how="left")
assert target.equals(exp_index)
exp_indexer = np.array([0, 2, 4])
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
assert target2.equals(exp_index2)
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
with pytest.raises(TypeError, match="Fill method not supported"):
idx.reindex(idx, method="pad", level="second")
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
# GH6552
idx = idx.copy()
target = idx.copy()
idx.names = target.names = [None, None]
other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
# list & ndarray cases
assert idx.reindex([])[0].names == [None, None]
assert idx.reindex(np.array([]))[0].names == [None, None]
assert idx.reindex(target.tolist())[0].names == [None, None]
assert idx.reindex(target.values)[0].names == [None, None]
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
assert idx.reindex(other_dtype.values)[0].names == [None, None]
idx.names = ["foo", "bar"]
assert idx.reindex([])[0].names == ["foo", "bar"]
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
# GH7774
idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
using_infer_string,
):
# GH7774
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
exp = np.object_ if not using_infer_string else str
assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp
# case with EA levels
cat = pd.Categorical(["foo", "bar"])
dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
mi = MultiIndex.from_product([cat, dti])
assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
def test_reindex_base(idx):
expected = np.arange(idx.size, dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match="Invalid fill method"):
idx.get_indexer(idx, method="invalid")
def test_reindex_non_unique():
idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
a = pd.Series(np.arange(4), index=idx)
new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
msg = "cannot handle a non-unique multi-index!"
with pytest.raises(ValueError, match=msg):
a.reindex(new_idx)
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
idx = MultiIndex.from_arrays(values)
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
expected_indexer = np.array([], dtype=result_indexer.dtype)
tm.assert_index_equal(result, expected)
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
def test_reindex_not_all_tuples():
keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
mi = MultiIndex.from_tuples(keys[:-1])
idx = Index(keys)
res, indexer = mi.reindex(idx)
tm.assert_index_equal(res, idx)
expected = np.array([0, 1, 2, -1], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected)
def test_reindex_limit_arg_with_multiindex():
# GH21247
idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
df = pd.Series([0.02, 0.01, 0.012], index=idx)
new_idx = MultiIndex.from_tuples(
[
(3, "A"),
(3, "B"),
(4, "A"),
(4, "B"),
(4, "C"),
(5, "B"),
(5, "C"),
(6, "B"),
(6, "C"),
]
)
with pytest.raises(
ValueError,
match="limit argument only valid if doing pad, backfill or nearest reindexing",
):
df.reindex(new_idx, fill_value=0, limit=1)
def test_reindex_with_none_in_nested_multiindex():
# GH42883
index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)])
index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)])
df1_dtype = pd.DataFrame([1, 2], index=index)
df2_dtype = pd.DataFrame([2, 1], index=index2)
result = df1_dtype.reindex_like(df2_dtype)
expected = df2_dtype
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,224 @@
from datetime import datetime
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_insert(idx):
# key contained in all levels
new_index = idx.insert(0, ("bar", "two"))
assert new_index.equal_levels(idx)
assert new_index[0] == ("bar", "two")
# key not contained in all levels
new_index = idx.insert(0, ("abc", "three"))
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
tm.assert_index_equal(new_index.levels[0], exp0)
assert new_index.names == ["first", "second"]
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ("abc", "three")
# key wrong length
msg = "Item must have length equal to number of levels"
with pytest.raises(ValueError, match=msg):
idx.insert(0, ("foo2",))
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
left.set_index(["1st", "2nd"], inplace=True)
ts = left["3rd"].copy(deep=True)
left.loc[("b", "x"), "3rd"] = 2
left.loc[("b", "a"), "3rd"] = -1
left.loc[("b", "b"), "3rd"] = 3
left.loc[("a", "x"), "3rd"] = 4
left.loc[("a", "w"), "3rd"] = 5
left.loc[("a", "a"), "3rd"] = 6
ts.loc[("b", "x")] = 2
ts.loc["b", "a"] = -1
ts.loc[("b", "b")] = 3
ts.loc["a", "x"] = 4
ts.loc[("a", "w")] = 5
ts.loc["a", "a"] = 6
right = pd.DataFrame(
[
["a", "b", 0],
["b", "d", 1],
["b", "x", 2],
["b", "a", -1],
["b", "b", 3],
["a", "x", 4],
["a", "w", 5],
["a", "a", 6],
],
columns=["1st", "2nd", "3rd"],
)
right.set_index(["1st", "2nd"], inplace=True)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
tm.assert_series_equal(ts, right["3rd"])
def test_insert2():
# GH9250
idx = (
[("test1", i) for i in range(5)]
+ [("test2", i) for i in range(6)]
+ [("test", 17), ("test", 18)]
)
left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
left.loc[("test", 17)] = 11
left.loc[("test", 18)] = 12
right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
tm.assert_series_equal(left, right)
def test_append(idx):
result = idx[:3].append(idx[3:])
assert result.equals(idx)
foos = [idx[:1], idx[1:3], idx[3:]]
result = foos[0].append(foos[1:])
assert result.equals(idx)
# empty
result = idx.append([])
assert result.equals(idx)
def test_append_index():
idx1 = Index([1.1, 1.2, 1.3])
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
idx3 = Index(["A", "B", "C"])
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
result = idx1.append(midx_lv2)
# see gh-7112
tz = pytz.timezone("Asia/Tokyo")
expected_tuples = [
(1.1, tz.localize(datetime(2011, 1, 1))),
(1.2, tz.localize(datetime(2011, 1, 2))),
(1.3, tz.localize(datetime(2011, 1, 3))),
]
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
tm.assert_index_equal(result, expected)
result = midx_lv2.append(idx1)
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
tm.assert_index_equal(result, expected)
result = midx_lv2.append(midx_lv2)
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
tm.assert_index_equal(result, expected)
result = midx_lv2.append(midx_lv3)
tm.assert_index_equal(result, expected)
result = midx_lv3.append(midx_lv2)
expected = Index._simple_new(
np.array(
[
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
]
+ expected_tuples,
dtype=object,
),
None,
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)])
def test_append_names_match(name, exp):
# GH#48288
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name])
result = midx.append(midx2)
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp])
tm.assert_index_equal(result, expected)
def test_append_names_dont_match():
# GH#48288
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"])
result = midx.append(midx2)
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None)
tm.assert_index_equal(result, expected)
def test_append_overlapping_interval_levels():
# GH 54934
ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0])
ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5])
mi1 = MultiIndex.from_product([ivl1, ivl1])
mi2 = MultiIndex.from_product([ivl2, ivl2])
result = mi1.append(mi2)
expected = MultiIndex.from_tuples(
[
(pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)),
(pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)),
(pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)),
(pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)),
(pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)),
(pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)),
(pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)),
(pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)),
]
)
tm.assert_index_equal(result, expected)
def test_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(m.repeat(reps), expected)
def test_insert_base(idx):
result = idx[1:4]
# test 0th element
assert idx[0:4].equals(result.insert(0, idx[0]))
def test_delete_base(idx):
expected = idx[1:]
result = idx.delete(0)
assert result.equals(expected)
assert result.name == expected.name
expected = idx[:-1]
result = idx.delete(-1)
assert result.equals(expected)
assert result.name == expected.name
msg = "index 6 is out of bounds for axis 0 with size 6"
with pytest.raises(IndexError, match=msg):
idx.delete(len(idx))

View File

@ -0,0 +1,772 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalIndex,
DataFrame,
Index,
IntervalIndex,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.api.types import (
is_float_dtype,
is_unsigned_integer_dtype,
)
@pytest.mark.parametrize("case", [0.5, "xxx"])
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
def test_set_ops_error_cases(idx, case, sort, method):
# non-iterable input
msg = "Input must be Index or array-like"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)(case, sort=sort)
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
def test_intersection_base(idx, sort, klass):
first = idx[2::-1] # first 3 elements reversed
second = idx[:5]
if klass is not MultiIndex:
second = klass(second.values)
intersect = first.intersection(second, sort=sort)
if sort is None:
expected = first.sort_values()
else:
expected = first
tm.assert_index_equal(intersect, expected)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.intersection([1, 2, 3], sort=sort)
@pytest.mark.arm_slow
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
def test_union_base(idx, sort, klass):
first = idx[::-1]
second = idx[:5]
if klass is not MultiIndex:
second = klass(second.values)
union = first.union(second, sort=sort)
if sort is None:
expected = first.sort_values()
else:
expected = first
tm.assert_index_equal(union, expected)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.union([1, 2, 3], sort=sort)
def test_difference_base(idx, sort):
second = idx[4:]
answer = idx[:4]
result = idx.difference(second, sort=sort)
if sort is None:
answer = answer.sort_values()
assert result.equals(answer)
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = idx.difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
idx.difference([1, 2, 3], sort=sort)
def test_symmetric_difference(idx, sort):
first = idx[1:]
second = idx[:-1]
answer = idx[[-1, 0]]
result = first.symmetric_difference(second, sort=sort)
if sort is None:
answer = answer.sort_values()
tm.assert_index_equal(result, answer)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.symmetric_difference(case, sort=sort)
tm.assert_index_equal(result, answer)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.symmetric_difference([1, 2, 3], sort=sort)
def test_multiindex_symmetric_difference():
# GH 13490
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
result = idx.symmetric_difference(idx)
assert result.names == idx.names
idx2 = idx.copy().rename(["A", "B"])
result = idx.symmetric_difference(idx2)
assert result.names == [None, None]
def test_empty(idx):
# GH 15270
assert not idx.empty
assert idx[:0].empty
def test_difference(idx, sort):
first = idx
result = first.difference(idx[-3:], sort=sort)
vals = idx[:-3].values
if sort is None:
vals = sorted(vals)
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
assert isinstance(result, MultiIndex)
assert result.equals(expected)
assert result.names == idx.names
tm.assert_index_equal(result, expected)
# empty difference: reflexive
result = idx.difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: superset
result = idx[-3:].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# empty difference: degenerate
result = idx[:0].difference(idx, sort=sort)
expected = idx[:0]
assert result.equals(expected)
assert result.names == idx.names
# names not the same
chunklet = idx[-3:]
chunklet.names = ["foo", "baz"]
result = first.difference(chunklet, sort=sort)
assert result.names == (None, None)
# empty, but non-equal
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
assert len(result) == 0
# raise Exception called with non-MultiIndex
result = first.difference(first.values, sort=sort)
assert result.equals(first[:0])
# name from empty array
result = first.difference([], sort=sort)
assert first.equals(result)
assert first.names == result.names
# name from non-empty array
result = first.difference([("foo", "one")], sort=sort)
expected = MultiIndex.from_tuples(
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
)
expected.names = first.names
assert first.names == result.names
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
first.difference([1, 2, 3, 4, 5], sort=sort)
def test_difference_sort_special():
# GH-24959
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
# sort=None, the default
result = idx.difference([])
tm.assert_index_equal(result, idx)
def test_difference_sort_special_true():
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
result = idx.difference([], sort=True)
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(result, expected)
def test_difference_sort_incomparable():
# GH-24959
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
# sort=None, the default
msg = "sort order is undefined for incomparable objects"
with tm.assert_produces_warning(RuntimeWarning, match=msg):
result = idx.difference(other)
tm.assert_index_equal(result, idx)
# sort=False
result = idx.difference(other, sort=False)
tm.assert_index_equal(result, idx)
def test_difference_sort_incomparable_true():
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
# TODO: this is raising in constructing a Categorical when calling
# algos.safe_sort. Should we catch and re-raise with a better message?
msg = "'values' is not ordered, please explicitly specify the categories order "
with pytest.raises(TypeError, match=msg):
idx.difference(other, sort=True)
def test_union(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_union = piece1.union(piece2, sort=sort)
if sort in (None, False):
tm.assert_index_equal(the_union.sort_values(), idx.sort_values())
else:
tm.assert_index_equal(the_union, idx)
# corner case, pass self or empty thing:
the_union = idx.union(idx, sort=sort)
tm.assert_index_equal(the_union, idx)
the_union = idx.union(idx[:0], sort=sort)
tm.assert_index_equal(the_union, idx)
tuples = idx.values
result = idx[:4].union(tuples[4:], sort=sort)
if sort is None:
tm.assert_index_equal(result.sort_values(), idx.sort_values())
else:
assert result.equals(idx)
def test_union_with_regular_index(idx, using_infer_string):
other = Index(["A", "B", "C"])
result = other.union(idx)
assert ("foo", "one") in result
assert "B" in result
if using_infer_string:
with pytest.raises(NotImplementedError, match="Can only union"):
idx.union(other)
else:
msg = "The values in the array are unorderable"
with tm.assert_produces_warning(RuntimeWarning, match=msg):
result2 = idx.union(other)
# This is more consistent now, if sorting fails then we don't sort at all
# in the MultiIndex case.
assert not result.equals(result2)
def test_intersection(idx, sort):
piece1 = idx[:5][::-1]
piece2 = idx[3:]
the_int = piece1.intersection(piece2, sort=sort)
if sort in (None, True):
tm.assert_index_equal(the_int, idx[3:5])
else:
tm.assert_index_equal(the_int.sort_values(), idx[3:5])
# corner case, pass self
the_int = idx.intersection(idx, sort=sort)
tm.assert_index_equal(the_int, idx)
# empty intersection: disjoint
empty = idx[:2].intersection(idx[2:], sort=sort)
expected = idx[:0]
assert empty.equals(expected)
tuples = idx.values
result = idx.intersection(tuples)
assert result.equals(idx)
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
def test_setop_with_categorical(idx, sort, method):
other = idx.to_flat_index().astype("category")
res_names = [None] * idx.nlevels
result = getattr(idx, method)(other, sort=sort)
expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
tm.assert_index_equal(result, expected)
result = getattr(idx, method)(other[:5], sort=sort)
expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
tm.assert_index_equal(result, expected)
def test_intersection_non_object(idx, sort):
other = Index(range(3), name="foo")
result = idx.intersection(other, sort=sort)
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
tm.assert_index_equal(result, expected, exact=True)
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
result = idx.intersection(np.asarray(other)[:0], sort=sort)
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
tm.assert_index_equal(result, expected, exact=True)
msg = "other must be a MultiIndex or a list of tuples"
with pytest.raises(TypeError, match=msg):
# With non-zero length non-index, we try and fail to convert to tuples
idx.intersection(np.asarray(other), sort=sort)
def test_intersect_equal_sort():
# GH-24959
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
def test_intersect_equal_sort_true():
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
result = idx.intersection(idx, sort=True)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_other_empty(slice_):
# https://github.com/pandas-dev/pandas/issues/24959
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)
# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)
def test_union_sort_other_empty_sort():
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
other = idx[:0]
result = idx.union(other, sort=True)
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
tm.assert_index_equal(result, expected)
def test_union_sort_other_incomparable():
# https://github.com/pandas-dev/pandas/issues/24959
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
# default, sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1])
tm.assert_index_equal(result, idx)
# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)
def test_union_sort_other_incomparable_sort():
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
msg = "'<' not supported between instances of 'Timestamp' and 'int'"
with pytest.raises(TypeError, match=msg):
idx.union(idx[:1], sort=True)
def test_union_non_object_dtype_raises():
# GH#32646 raise NotImplementedError instead of less-informative error
mi = MultiIndex.from_product([["a", "b"], [1, 2]])
idx = mi.levels[1]
msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
with pytest.raises(NotImplementedError, match=msg):
mi.union(idx)
def test_union_empty_self_different_names():
# GH#38423
mi = MultiIndex.from_arrays([[]])
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
result = mi.union(mi2)
expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
tm.assert_index_equal(result, expected)
def test_union_multiindex_empty_rangeindex():
# GH#41234
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
ri = pd.RangeIndex(0)
result_left = mi.union(ri)
tm.assert_index_equal(mi, result_left, check_names=False)
result_right = ri.union(mi)
tm.assert_index_equal(mi, result_right, check_names=False)
@pytest.mark.parametrize(
"method", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_setops_sort_validation(method):
idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=2)
# sort=True is supported as of GH#?
getattr(idx1, method)(idx2, sort=True)
@pytest.mark.parametrize("val", [pd.NA, 100])
def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
# GH#48606
midx = MultiIndex.from_arrays(
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
)
midx2 = MultiIndex.from_arrays(
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
)
result = midx.difference(midx2)
expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]])
tm.assert_index_equal(result, expected)
result = midx.difference(midx.sort_values(ascending=False))
expected = MultiIndex.from_arrays(
[Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)],
names=["a", None],
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("val", [pd.NA, 5])
def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
# GH#48607
midx = MultiIndex.from_arrays(
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
)
midx2 = MultiIndex.from_arrays(
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
)
result = midx.symmetric_difference(midx2)
expected = MultiIndex.from_arrays(
[Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
("tuples", "exp_tuples"),
[
([("val1", "test1")], [("val1", "test1")]),
([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
(
[("val2", "test2"), ("val1", "test1")],
[("val2", "test2"), ("val1", "test1")],
),
],
)
def test_intersect_with_duplicates(tuples, exp_tuples):
# GH#36915
left = MultiIndex.from_tuples(tuples, names=["first", "second"])
right = MultiIndex.from_tuples(
[("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
names=["first", "second"],
)
result = left.intersection(right)
expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"data, names, expected",
[
((1,), None, [None, None]),
((1,), ["a"], [None, None]),
((1,), ["b"], [None, None]),
((1, 2), ["c", "d"], [None, None]),
((1, 2), ["b", "a"], [None, None]),
((1, 2, 3), ["a", "b", "c"], [None, None]),
((1, 2), ["a", "c"], ["a", None]),
((1, 2), ["c", "b"], [None, "b"]),
((1, 2), ["a", "b"], ["a", "b"]),
((1, 2), [None, "b"], [None, "b"]),
],
)
def test_maybe_match_names(data, names, expected):
# GH#38323
mi = MultiIndex.from_tuples([], names=["a", "b"])
mi2 = MultiIndex.from_tuples([data], names=names)
result = mi._maybe_match_names(mi2)
assert result == expected
def test_intersection_equal_different_names():
# GH#30302
mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
result = mi1.intersection(mi2)
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
tm.assert_index_equal(result, expected)
def test_intersection_different_names():
# GH#38323
mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
mi2 = MultiIndex.from_arrays([[1], [3]])
result = mi.intersection(mi2)
tm.assert_index_equal(result, mi2)
def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
# GH#38623
mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
result = mi1.intersection(mi2)
expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]])
tm.assert_index_equal(result, expected)
def test_union_with_missing_values_on_both_sides(nulls_fixture):
# GH#38623
mi1 = MultiIndex.from_arrays([[1, nulls_fixture]])
mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]])
result = mi1.union(mi2)
expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
@pytest.mark.parametrize("sort", [None, False])
def test_union_nan_got_duplicated(dtype, sort):
# GH#38977, GH#49010
mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]])
mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]])
result = mi1.union(mi2, sort=sort)
if sort is None:
expected = MultiIndex.from_arrays(
[pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]]
)
else:
expected = mi2
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("val", [4, 1])
def test_union_keep_ea_dtype(any_numeric_ea_dtype, val):
# GH#48505
arr1 = Series([val, 2], dtype=any_numeric_ea_dtype)
arr2 = Series([2, 1], dtype=any_numeric_ea_dtype)
midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None])
midx2 = MultiIndex.from_arrays([arr2, [2, 1]])
result = midx.union(midx2)
if val == 4:
expected = MultiIndex.from_arrays(
[Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]]
)
else:
expected = MultiIndex.from_arrays(
[Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]]
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("dupe_val", [3, pd.NA])
def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype):
# GH48900
mi1 = MultiIndex.from_arrays(
[
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
]
)
mi2 = MultiIndex.from_arrays(
[
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
]
)
result = mi1.union(mi2)
expected = MultiIndex.from_arrays(
[
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
]
)
tm.assert_index_equal(result, expected)
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_union_duplicates(index, request):
# GH#38977
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
pytest.skip(f"No duplicates in an empty {type(index).__name__}")
values = index.unique().values.tolist()
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
result = mi2.union(mi1)
expected = mi2.sort_values()
tm.assert_index_equal(result, expected)
if (
is_unsigned_integer_dtype(mi2.levels[0])
and (mi2.get_level_values(0) < 2**63).all()
):
# GH#47294 - union uses lib.fast_zip, converting data to Python integers
# and loses type information. Result is then unsigned only when values are
# sufficiently large to require unsigned dtype. This happens only if other
# has dups or one of both have missing values
expected = expected.set_levels(
[expected.levels[0].astype(np.int64), expected.levels[1]]
)
elif is_float_dtype(mi2.levels[0]):
# mi2 has duplicates witch is a different path than above, Fix that path
# to use correct float dtype?
expected = expected.set_levels(
[expected.levels[0].astype(float), expected.levels[1]]
)
result = mi1.union(mi2)
tm.assert_index_equal(result, expected)
def test_union_keep_dtype_precision(any_real_numeric_dtype):
# GH#48498
arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype)
arr2 = Series([1, 4], dtype=any_real_numeric_dtype)
midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None])
midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None])
result = midx.union(midx2)
expected = MultiIndex.from_arrays(
([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]),
names=["a", None],
)
tm.assert_index_equal(result, expected)
def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype):
# GH#48498
arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype)
arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None])
midx2 = MultiIndex.from_arrays([arr2, [1, 2]])
result = midx.union(midx2)
expected = MultiIndex.from_arrays(
[Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]]
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"levels1, levels2, codes1, codes2, names",
[
(
[["a", "b", "c"], [0, ""]],
[["c", "d", "b"], [""]],
[[0, 1, 2], [1, 1, 1]],
[[0, 1, 2], [0, 0, 0]],
["name1", "name2"],
),
],
)
def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
# GH#25169
mi1 = MultiIndex(levels=levels1, codes=codes1, names=names)
mi2 = MultiIndex(levels=levels2, codes=codes2, names=names)
mi_int = mi1.intersection(mi2)
assert mi_int._lexsort_depth == 2
@pytest.mark.parametrize(
"a",
[pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]],
)
@pytest.mark.parametrize(
"b",
[
pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True),
pd.Categorical(["a", "b"], categories=["b", "a"]),
],
)
def test_intersection_with_non_lex_sorted_categories(a, b):
# GH#49974
other = ["1", "2"]
df1 = DataFrame({"x": a, "y": other})
df2 = DataFrame({"x": b, "y": other})
expected = MultiIndex.from_arrays([a, other], names=["x", "y"])
res1 = MultiIndex.from_frame(df1).intersection(
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
)
res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2))
res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
MultiIndex.from_frame(df2)
)
res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
)
tm.assert_index_equal(res1, expected)
tm.assert_index_equal(res2, expected)
tm.assert_index_equal(res3, expected)
tm.assert_index_equal(res4, expected)
@pytest.mark.parametrize("val", [pd.NA, 100])
def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
# GH#48604
midx = MultiIndex.from_arrays(
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
)
midx2 = MultiIndex.from_arrays(
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
)
result = midx.intersection(midx2)
expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]])
tm.assert_index_equal(result, expected)
def test_union_with_na_when_constructing_dataframe():
# GH43222
series1 = Series(
(1,),
index=MultiIndex.from_arrays(
[Series([None], dtype="string"), Series([None], dtype="string")]
),
)
series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
result = DataFrame([series1, series2])
expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,349 @@
import numpy as np
import pytest
from pandas.errors import (
PerformanceWarning,
UnsortedIndexError,
)
from pandas import (
CategoricalIndex,
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core.indexes.frozen import FrozenList
def test_sortlevel(idx):
tuples = list(idx)
np.random.default_rng(2).shuffle(tuples)
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_not_sort_remaining():
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
assert sorted_idx.equals(mi)
def test_sortlevel_deterministic():
tuples = [
("bar", "one"),
("foo", "two"),
("qux", "two"),
("foo", "one"),
("baz", "two"),
("qux", "one"),
]
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_na_position():
# GH#51612
midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)])
result = midx.sortlevel(level=[0, 1], na_position="last")[0]
expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)])
tm.assert_index_equal(result, expected)
def test_numpy_argsort(idx):
result = np.argsort(idx)
expected = idx.argsort()
tm.assert_numpy_array_equal(result, expected)
# these are the only two types that perform
# pandas compatibility input validation - the
# rest already perform separate (or no) such
# validation via their 'values' attribute as
# defined in pandas.core.indexes/base.py - they
# cannot be changed at the moment due to
# backwards compatibility concerns
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, axis=1)
msg = "the 'kind' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, kind="mergesort")
msg = "the 'order' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.argsort(idx, order=("a", "b"))
def test_unsortedindex():
# GH 11897
mi = MultiIndex.from_tuples(
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
names=["one", "two"],
)
df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)["z", "a"]
expected = df.iloc[0]
tm.assert_series_equal(result, expected)
msg = (
"MultiIndex slicing requires the index to be lexsorted: "
r"slicing on levels \[1\], lexsort depth 0"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc(axis=0)["z", slice("a")]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)["z", :]) == 2
with pytest.raises(KeyError, match="'q'"):
df.loc(axis=0)["q", :]
def test_unsortedindex_doc_examples():
# https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
dfm = DataFrame(
{
"jim": [0, 0, 1, 1],
"joe": ["x", "x", "z", "y"],
"jolie": np.random.default_rng(2).random(4),
}
)
dfm = dfm.set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning):
dfm.loc[(1, "z")]
msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
with pytest.raises(UnsortedIndexError, match=msg):
dfm.loc[(0, "y"):(1, "z")]
assert not dfm.index._is_lexsorted()
assert dfm.index._lexsort_depth == 1
# sort it
dfm = dfm.sort_index()
dfm.loc[(1, "z")]
dfm.loc[(0, "y"):(1, "z")]
assert dfm.index._is_lexsorted()
assert dfm.index._lexsort_depth == 2
def test_reconstruct_sort():
# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert mi.is_monotonic_increasing
recons = mi._sort_levels_monotonic()
assert recons.is_monotonic_increasing
assert mi is recons
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex.from_tuples(
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
names=["one", "two"],
)
assert not mi.is_monotonic_increasing
recons = mi._sort_levels_monotonic()
assert not recons.is_monotonic_increasing
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex(
levels=[["b", "d", "a"], [1, 2, 3]],
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=["col1", "col2"],
)
assert not mi.is_monotonic_increasing
recons = mi._sort_levels_monotonic()
assert not recons.is_monotonic_increasing
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
def test_reconstruct_remove_unused():
# xref to GH 2770
df = DataFrame(
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
columns=["first", "second", "third"],
)
df2 = df.set_index(["first", "second"], drop=False)
df2 = df2[df2["first"] != "deleteMe"]
# removed levels are there
expected = MultiIndex(
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
codes=[[1, 2], [1, 2]],
names=["first", "second"],
)
result = df2.index
tm.assert_index_equal(result, expected)
expected = MultiIndex(
levels=[["keepMe", "keepMeToo"], [2, 3]],
codes=[[0, 1], [0, 1]],
names=["first", "second"],
)
result = df2.index.remove_unused_levels()
tm.assert_index_equal(result, expected)
# idempotent
result2 = result.remove_unused_levels()
tm.assert_index_equal(result2, expected)
assert result2.is_(result)
@pytest.mark.parametrize(
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
)
def test_remove_unused_levels_large(first_type, second_type):
# GH16556
# because tests should be deterministic (and this test in particular
# checks that levels are removed, which is not the case for every
# random input):
rng = np.random.default_rng(10) # seed is arbitrary value that works
size = 1 << 16
df = DataFrame(
{
"first": rng.integers(0, 1 << 13, size).astype(first_type),
"second": rng.integers(0, 1 << 10, size).astype(second_type),
"third": rng.random(size),
}
)
df = df.groupby(["first", "second"]).sum()
df = df[df.third < 0.1]
result = df.index.remove_unused_levels()
assert len(result.levels[0]) < len(df.index.levels[0])
assert len(result.levels[1]) < len(df.index.levels[1])
assert result.equals(df.index)
expected = df.reset_index().set_index(["first", "second"]).index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
@pytest.mark.parametrize(
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
)
def test_remove_unused_nan(level0, level1):
# GH 18417
mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert "unused" not in result.levels[level]
def test_argsort(idx):
result = idx.argsort()
expected = idx.values.argsort()
tm.assert_numpy_array_equal(result, expected)
def test_remove_unused_levels_with_nan():
# GH 37510
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
idx = idx.set_levels(["a", np.nan], level="id1")
idx = idx.remove_unused_levels()
result = idx.levels
expected = FrozenList([["a", np.nan], [4]])
assert str(result) == str(expected)
def test_sort_values_nan():
# GH48495, GH48626
midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]])
result = midx.sort_values()
expected = MultiIndex(
levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]]
)
tm.assert_index_equal(result, expected)
def test_sort_values_incomparable():
# GH48495
mi = MultiIndex.from_arrays(
[
[1, Timestamp("2000-01-01")],
[3, 4],
]
)
match = "'<' not supported between instances of 'Timestamp' and 'int'"
with pytest.raises(TypeError, match=match):
mi.sort_values()
@pytest.mark.parametrize("na_position", ["first", "last"])
@pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"])
def test_sort_values_with_na_na_position(dtype, na_position):
# 51612
arrays = [
Series([1, 1, 2], dtype=dtype),
Series([1, None, 3], dtype=dtype),
]
index = MultiIndex.from_arrays(arrays)
result = index.sort_values(na_position=na_position)
if na_position == "first":
arrays = [
Series([1, 1, 2], dtype=dtype),
Series([None, 1, 3], dtype=dtype),
]
else:
arrays = [
Series([1, 1, 2], dtype=dtype),
Series([1, None, 3], dtype=dtype),
]
expected = MultiIndex.from_arrays(arrays)
tm.assert_index_equal(result, expected)
def test_sort_unnecessary_warning():
# GH#55386
midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)])
midx = midx.set_levels([2.5, np.nan, 1], level=0)
result = midx.sort_values()
expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,78 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_take(idx):
indexer = [4, 3, 0, 2]
result = idx.take(indexer)
expected = idx[indexer]
assert result.equals(expected)
# GH 10791
msg = "'MultiIndex' object has no attribute 'freq'"
with pytest.raises(AttributeError, match=msg):
idx.freq
def test_take_invalid_kwargs(idx):
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_take_fill_value():
# GH 12631
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
result = idx.take(np.array([1, 0, -1]))
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
("B", pd.Timestamp("2011-01-02")),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
(np.nan, pd.NaT),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
exp_vals = [
("A", pd.Timestamp("2011-01-02")),
("A", pd.Timestamp("2011-01-01")),
("B", pd.Timestamp("2011-01-02")),
]
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
tm.assert_index_equal(result, expected)
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for( axis 0 with)? size 4"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))