Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ pandas 0.11.1
- ``DataFrame.interpolate()`` is now deprecated. Please use
``DataFrame.fillna()`` and ``DataFrame.replace()`` instead (GH3582_,
GH3675_, GH3676_).
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
deprecated
- ``DataFrame.replace`` 's ``infer_types`` parameter is removed and now
performs conversion by default. (GH3907_)
- Deprecated display.height, display.width is now only a formatting option
does not control triggering of summary, similar to < 0.11.0.
- Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
Expand All @@ -141,6 +145,8 @@ pandas 0.11.1
``to_pickle`` instance method, ``save`` and ``load`` will give deprecation warning.
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
deprecated
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
deprecated
- Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_)
- ``as_matrix`` with mixed signed and unsigned dtypes will result in 2 x the lcd of the unsigned
as an int, maxing with ``int64``, to avoid precision issues (GH3733_)
Expand Down Expand Up @@ -236,6 +242,8 @@ pandas 0.11.1
- Fix incorrect arguments passed to concat that are not list-like (e.g. concat(df1,df2)) (GH3481_)
- Correctly parse when passed the ``dtype=str`` (or other variable-len string dtypes) in ``read_csv`` (GH3795_)
- Fix index name not propogating when using ``loc/ix`` (GH3880_)
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
in the ``to_replace`` argument wasn't working (GH3907_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -331,6 +339,7 @@ pandas 0.11.1
.. _GH3873: https://github.com/pydata/pandas/issues/3873
.. _GH3877: https://github.com/pydata/pandas/issues/3877
.. _GH3880: https://github.com/pydata/pandas/issues/3880
.. _GH3907: https://github.com/pydata/pandas/issues/3907


pandas 0.11.0
Expand Down
6 changes: 6 additions & 0 deletions doc/source/v0.11.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ API changes
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
deprecated

- ``DataFrame.replace`` 's ``infer_types`` parameter is removed and now
performs conversion by default. (GH3907_)

- Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_)
- Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_)
Expand Down Expand Up @@ -356,6 +359,8 @@ Bug Fixes

- ``DataFrame.from_records`` did not accept empty recarrays (GH3682_)
- ``read_html`` now correctly skips tests (GH3741_)
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
in the ``to_replace`` argument wasn't working (GH3907_)

See the `full release notes
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
Expand Down Expand Up @@ -410,3 +415,4 @@ on GitHub for a complete list.
.. _GH3877: https://github.com/pydata/pandas/issues/3877
.. _GH3659: https://github.com/pydata/pandas/issues/3659
.. _GH3679: https://github.com/pydata/pandas/issues/3679
.. _GH3907: https://github.com/pydata/pandas/issues/3907
16 changes: 15 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import itertools
import re
from datetime import datetime

from numpy.lib.format import read_array, write_array
Expand Down Expand Up @@ -1585,8 +1586,21 @@ def is_complex_dtype(arr_or_dtype):
return issubclass(tipo, np.complexfloating)


def is_re(obj):
return isinstance(obj, re._pattern_type)


def is_re_compilable(obj):
try:
re.compile(obj)
except TypeError:
return False
else:
return True


def is_list_like(arg):
return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
return hasattr(arg, '__iter__') and not isinstance(arg, basestring)

def _is_sequence(x):
try:
Expand Down
19 changes: 7 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
_maybe_convert_indices)
from pandas.core.internals import (BlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks,
_re_compilable)
create_block_manager_from_blocks)
from pandas.core.series import Series, _radd_compat
import pandas.core.expressions as expressions
from pandas.compat.scipy import scoreatpercentile as _quantile
Expand Down Expand Up @@ -3483,7 +3482,7 @@ def bfill(self, axis=0, inplace=False, limit=None):
limit=limit)

def replace(self, to_replace=None, value=None, inplace=False, limit=None,
regex=False, infer_types=False, method=None, axis=None):
regex=False, method=None, axis=None):
"""
Replace values given in 'to_replace' with 'value'.

Expand Down Expand Up @@ -3545,8 +3544,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
string. Otherwise, `to_replace` must be ``None`` because this
parameter will be interpreted as a regular expression or a list,
dict, or array of regular expressions.
infer_types : bool, default True
If ``True`` attempt to convert object blocks to a better dtype.

See also
--------
Expand Down Expand Up @@ -3582,7 +3579,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
and play with this method to gain intuition about how it works.

"""
if not isinstance(regex, bool) and to_replace is not None:
if not com.is_bool(regex) and to_replace is not None:
raise AssertionError("'to_replace' must be 'None' if 'regex' is "
"not a bool")
if method is not None:
Expand Down Expand Up @@ -3628,8 +3625,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
to_replace, value = keys, values

return self.replace(to_replace, value, inplace=inplace,
limit=limit, regex=regex,
infer_types=infer_types)
limit=limit, regex=regex)
else:
if not len(self.columns):
return self
Expand Down Expand Up @@ -3673,14 +3669,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
new_data = self._data.replace(to_replace, value,
inplace=inplace, regex=regex)
elif to_replace is None:
if not (_re_compilable(regex) or
if not (com.is_re_compilable(regex) or
isinstance(regex, (list, dict, np.ndarray, Series))):
raise TypeError("'regex' must be a string or a compiled "
"regular expression or a list or dict of "
"strings or regular expressions, you "
"passed a {0}".format(type(regex)))
return self.replace(regex, value, inplace=inplace, limit=limit,
regex=True, infer_types=infer_types)
regex=True)
else:

# dest iterable dict-like
Expand All @@ -3701,8 +3697,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
raise TypeError('Invalid "to_replace" type: '
'{0}'.format(type(to_replace))) # pragma: no cover

if infer_types:
new_data = new_data.convert()
new_data = new_data.convert(copy=not inplace, convert_numeric=False)

if inplace:
self._data = new_data
Expand Down
33 changes: 16 additions & 17 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import itertools
import re
from datetime import datetime
import collections

from numpy import nan
import numpy as np

from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE,
_TD_DTYPE)
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_handle_legacy_indexes)
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
import pandas.core.common as com
import pandas.lib as lib
Expand All @@ -18,10 +19,6 @@
from pandas.util import py3compat


def _re_compilable(ex):
return isinstance(ex, (basestring, re._pattern_type))


class Block(object):
"""
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
Expand Down Expand Up @@ -744,14 +741,16 @@ def should_store(self, value):
def replace(self, to_replace, value, inplace=False, filter=None,
regex=False):
blk = [self]
to_rep_is_list = (isinstance(to_replace, collections.Iterable) and not
isinstance(to_replace, basestring))
value_is_list = (isinstance(value, collections.Iterable) and not
isinstance(to_replace, basestring))
to_rep_is_list = com.is_list_like(to_replace)
value_is_list = com.is_list_like(value)
both_lists = to_rep_is_list and value_is_list
either_list = to_rep_is_list or value_is_list

if not either_list and not regex:
if not either_list and com.is_re(to_replace):
blk[0], = blk[0]._replace_single(to_replace, value,
inplace=inplace, filter=filter,
regex=True)
elif not (either_list or regex):
blk = super(ObjectBlock, self).replace(to_replace, value,
inplace=inplace,
filter=filter, regex=regex)
Expand All @@ -773,15 +772,18 @@ def replace(self, to_replace, value, inplace=False, filter=None,
def _replace_single(self, to_replace, value, inplace=False, filter=None,
regex=False):
# to_replace is regex compilable
to_rep_re = _re_compilable(to_replace)
to_rep_re = com.is_re_compilable(to_replace)

# regex is regex compilable
regex_re = _re_compilable(regex)
regex_re = com.is_re_compilable(regex)

# only one will survive
if to_rep_re and regex_re:
raise AssertionError('only one of to_replace and regex can be '
'regex compilable')

# if regex was passed as something that can be a regex (rather than a
# boolean)
if regex_re:
to_replace = regex

Expand Down Expand Up @@ -1668,7 +1670,6 @@ def get(self, item):
mgr._consolidate_inplace()
return mgr


def iget(self, i):
item = self.items[i]
if self.items.is_unique:
Expand Down Expand Up @@ -1970,7 +1971,6 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=np.nan):
def _reindex_indexer_items(self, new_items, indexer, fill_value):
# TODO: less efficient than I'd like

is_unique = self.items.is_unique
item_order = com.take_1d(self.items.values, indexer)

# keep track of what items aren't found anywhere
Expand Down Expand Up @@ -2141,7 +2141,6 @@ def rename_axis(self, mapper, axis=1):

def rename_items(self, mapper, copydata=True):
new_items = Index([mapper(x) for x in self.items])
is_unique = new_items.is_unique

new_blocks = []
for block in self.blocks:
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime
import sys
import re

import nose
import unittest
Expand Down Expand Up @@ -244,6 +245,18 @@ def test_groupby():
assert v == expected[k]


def test_is_list_like():
passes = ([], [1], (1,), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
Series([]), Series(['a']).str)
fails = (1, '2', object())

for p in passes:
assert com.is_list_like(p)

for f in fails:
assert not com.is_list_like(f)


def test_ensure_int32():
values = np.arange(10, dtype=np.int32)
result = com._ensure_int32(values)
Expand Down Expand Up @@ -288,6 +301,30 @@ def test_ensure_platform_int():
# expected = u"\u05d0".encode('utf-8')
# assert (result == expected)


def test_is_re():
passes = re.compile('ad'),
fails = 'x', 2, 3, object()

for p in passes:
assert com.is_re(p)

for f in fails:
assert not com.is_re(f)


def test_is_recompilable():
passes = (r'a', u'x', r'asdf', re.compile('adsf'), ur'\u2233\s*',
re.compile(r''))
fails = 1, [], object()

for p in passes:
assert com.is_re_compilable(p)

for f in fails:
assert not com.is_re_compilable(f)


class TestTake(unittest.TestCase):

_multiprocess_can_split_ = True
Expand Down
40 changes: 39 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6696,7 +6696,7 @@ def test_regex_replace_list_to_scalar(self):
res3 = df.copy()
res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True)
res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True)
expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4, object),
expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4),
'c': [nan, nan, nan, 'd']})
assert_frame_equal(res, expec)
assert_frame_equal(res2, expec)
Expand Down Expand Up @@ -6772,6 +6772,30 @@ def test_replace(self):
df = DataFrame(index=['a', 'b'])
assert_frame_equal(df, df.replace(5, 7))

def test_replace_list(self):
obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
dfobj = DataFrame(obj)

## lists of regexes and values
# list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
to_replace_res = [r'.', r'e']
values = [nan, 'crap']
res = dfobj.replace(to_replace_res, values)
expec = DataFrame({'a': ['a', 'b', nan, nan],
'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap',
'l', 'o']})
assert_frame_equal(res, expec)

# list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
to_replace_res = [r'.', r'f']
values = [r'..', r'crap']
res = dfobj.replace(to_replace_res, values)
expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e', 'crap', 'g',
'h'],
'c': ['h', 'e', 'l', 'o']})

assert_frame_equal(res, expec)

def test_replace_series_dict(self):
# from GH 3064
df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
Expand All @@ -6792,10 +6816,24 @@ def test_replace_series_dict(self):
result = df.replace(s, df.mean())
assert_frame_equal(result, expected)

def test_replace_convert(self):
# gh 3907
df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
m = {'foo': 1, 'bar': 2, 'bah': 3}
rep = df.replace(m)
expec = Series([np.int_, np.int_, np.int_])
res = rep.dtypes
assert_series_equal(expec, res)

def test_replace_mixed(self):
self.mixed_frame['foo'][5:20] = nan
self.mixed_frame['A'][-10:] = nan

result = self.mixed_frame.replace(np.nan, -18)
expected = self.mixed_frame.fillna(value=-18)
assert_frame_equal(result, expected)
assert_frame_equal(result.replace(-18, nan), self.mixed_frame)

result = self.mixed_frame.replace(np.nan, -1e8)
expected = self.mixed_frame.fillna(value=-1e8)
assert_frame_equal(result, expected)
Expand Down