Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
df_pyarrow.dtypes

Copy on write improvements
^^^^^^^^^^^^^^^^^^^^^^^^^^
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a new section like is being added in #50471. We can consolidate those different blurbs later on in a coherent whatsnew.


* The :class:`Series` constructor will now create a lazy copy (deferring the copy until
a modification to the data happens) when constructing a Series from an existing
Series with the default of ``copy=False`` (:issue:`50471`)

.. _whatsnew_200.enhancements.other:

Other enhancements
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,17 @@ def __init__(
elif isinstance(data, Series):
if index is None:
index = data.index
if (
get_option("mode.copy_on_write")
and get_option("mode.data_manager") == "block"
):
data = data._mgr.copy(deep=False)
else:
data = data._mgr
else:
data = data.reindex(index, copy=copy)
copy = False
data = data._mgr
data = data._mgr
elif is_dict_like(data):
data, index = self._init_dict(data, index, dtype)
dtype = None
Expand Down
75 changes: 75 additions & 0 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import numpy as np

from pandas import Series

# -----------------------------------------------------------------------------
# Copy/view behaviour for Series / DataFrame constructors


def test_series_from_series(using_copy_on_write):
# Case: constructing a Series from another Series object follows CoW rules:
# a new object is returned and thus mutations are not propagated
ser = Series([1, 2, 3], name="name")

# default is copy=False -> new Series is a shallow copy / view of original
result = Series(ser)

# the shallow copy still shares memory
assert np.shares_memory(ser.values, result.values)

if using_copy_on_write:
assert result._mgr.refs is not None

if using_copy_on_write:
# mutating new series copy doesn't mutate original
result.iloc[0] = 0
assert ser.iloc[0] == 1
# mutating triggered a copy-on-write -> no longer shares memory
assert not np.shares_memory(ser.values, result.values)
else:
# mutating shallow copy does mutate original
result.iloc[0] = 0
assert ser.iloc[0] == 0
# and still shares memory
assert np.shares_memory(ser.values, result.values)

# the same when modifying the parent
result = Series(ser)

if using_copy_on_write:
# mutating original doesn't mutate new series
ser.iloc[0] = 0
assert result.iloc[0] == 1
else:
# mutating original does mutate shallow copy
ser.iloc[0] = 0
assert result.iloc[0] == 0


def test_series_from_series_with_reindex(using_copy_on_write):
# Case: constructing a Series from another Series with specifying an index
# that potentially requires a reindex of the values
ser = Series([1, 2, 3], name="name")

# passing an index that doesn't actually require a reindex of the values
# -> without CoW we get an actual mutating view
for index in [
ser.index,
ser.index.copy(),
list(ser.index),
ser.index.rename("idx"),
]:
result = Series(ser, index=index)
assert np.shares_memory(ser.values, result.values)
result.iloc[0] = 0
if using_copy_on_write:
assert ser.iloc[0] == 1
else:
assert ser.iloc[0] == 0

# ensure that if an actual reindex is needed, we don't have any refs
# (mutating the result wouldn't trigger CoW)
result = Series(ser, index=[0, 1, 2, 3])
assert not np.shares_memory(ser.values, result.values)
if using_copy_on_write:
assert result._mgr.refs is None or result._mgr.refs[0] is None
3 changes: 0 additions & 3 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,3 @@ def test_dataframe_add_column_from_series():
df.loc[2, "new"] = 100
expected_s = Series([0, 11, 12])
tm.assert_series_equal(s, expected_s)


# TODO add tests for constructors