-
Notifications
You must be signed in to change notification settings - Fork 67
Expand file tree
/
Copy pathtest_dataframe.py
More file actions
245 lines (190 loc) · 8.31 KB
/
test_dataframe.py
File metadata and controls
245 lines (190 loc) · 8.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import google.cloud.bigquery
import pandas as pd
import pytest
import bigframes.dataframe
import bigframes.session
from bigframes.testing import mocks
def test_dataframe_dropna_axis_1_subset_not_implememented(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(NotImplementedError, match="subset"):
dataframe.dropna(axis=1, subset=["col1", "col2"])
def test_dataframe_repr_with_uninitialized_object():
"""Ensures DataFrame.__init__ can be paused in a visual debugger without crashing.
Regression test for https://github.com/googleapis/python-bigquery-dataframes/issues/728
"""
# Avoid calling __init__ to simulate pausing __init__ in a debugger.
# https://stackoverflow.com/a/6384982/101923
dataframe = bigframes.dataframe.DataFrame.__new__(bigframes.dataframe.DataFrame)
got = repr(dataframe)
assert "DataFrame" in got
@pytest.mark.parametrize(
"rule",
[
pd.DateOffset(weeks=1),
pd.Timedelta(hours=8),
# According to
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html
# these all default to "right" for closed and label, which isn't yet supported.
"ME",
"YE",
"QE",
"BME",
"BA",
"BQE",
"W",
],
)
def test_dataframe_rule_not_implememented(
monkeypatch: pytest.MonkeyPatch,
rule,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(NotImplementedError, match="rule"):
dataframe.resample(rule=rule)
def test_dataframe_closed_not_implememented(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(NotImplementedError, match="Only closed='left'"):
dataframe.resample(rule="1d", closed="right")
def test_dataframe_label_not_implememented(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(NotImplementedError, match="Only label='left'"):
dataframe.resample(rule="1d", label="right")
@pytest.mark.parametrize(
"origin",
[
"end",
"end_day",
],
)
def test_dataframe_origin_not_implememented(
monkeypatch: pytest.MonkeyPatch,
origin,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(NotImplementedError, match="origin"):
dataframe.resample(rule="1d", origin=origin)
def test_dataframe_setattr_with_uninitialized_object():
"""Ensures DataFrame can be subclassed without trying to set attributes as columns."""
# Avoid calling __init__ since it might be called later in a subclass.
# https://stackoverflow.com/a/6384982/101923
dataframe = bigframes.dataframe.DataFrame.__new__(bigframes.dataframe.DataFrame)
dataframe.lineage = "my-test-value"
assert dataframe.lineage == "my-test-value" # Should just be a regular attribute.
def test_dataframe_to_gbq_invalid_destination(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(ValueError, match="no_dataset_or_project"):
dataframe.to_gbq("no_dataset_or_project")
def test_dataframe_to_gbq_invalid_if_exists(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(ValueError, match="notreallyanoption"):
# Even though the type is annotated with the literals we accept, users
# might not be using a type checker, especially not in an interactive
# notebook.
dataframe.to_gbq(if_exists="notreallyanoption") # type: ignore
def test_dataframe_to_gbq_invalid_if_exists_no_destination(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.raises(ValueError, match="append"):
dataframe.to_gbq(if_exists="append")
def test_dataframe_to_gbq_writes_to_anonymous_dataset(
monkeypatch: pytest.MonkeyPatch,
):
anonymous_dataset_id = "my-anonymous-project.my_anonymous_dataset"
anonymous_dataset = google.cloud.bigquery.DatasetReference.from_string(
anonymous_dataset_id
)
session = mocks.create_bigquery_session(anonymous_dataset=anonymous_dataset)
dataframe = mocks.create_dataframe(monkeypatch, session=session)
destination = dataframe.to_gbq()
assert destination.startswith(anonymous_dataset_id)
def test_dataframe_rename_columns(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(
monkeypatch, data={"col1": [], "col2": [], "col3": []}
)
assert dataframe.columns.to_list() == ["col1", "col2", "col3"]
renamed = dataframe.rename(columns={"col1": "a", "col2": "b", "col3": "c"})
assert renamed.columns.to_list() == ["a", "b", "c"]
def test_dataframe_rename_columns_inplace_returns_none(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(
monkeypatch, data={"col1": [], "col2": [], "col3": []}
)
assert dataframe.columns.to_list() == ["col1", "col2", "col3"]
assert (
dataframe.rename(columns={"col1": "a", "col2": "b", "col3": "c"}, inplace=True)
is None
)
assert dataframe.columns.to_list() == ["a", "b", "c"]
def test_dataframe_rename_axis(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(
monkeypatch, data={"index1": [], "index2": [], "col1": [], "col2": []}
).set_index(["index1", "index2"])
assert list(dataframe.index.names) == ["index1", "index2"]
renamed = dataframe.rename_axis(["a", "b"])
assert list(renamed.index.names) == ["a", "b"]
def test_dataframe_rename_axis_inplace_returns_none(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(
monkeypatch, data={"index1": [], "index2": [], "col1": [], "col2": []}
).set_index(["index1", "index2"])
assert list(dataframe.index.names) == ["index1", "index2"]
assert dataframe.rename_axis(["a", "b"], inplace=True) is None
assert list(dataframe.index.names) == ["a", "b"]
def test_dataframe_drop_columns_inplace_returns_none(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(
monkeypatch, data={"col1": [1], "col2": [2], "col3": [3]}
)
assert dataframe.columns.to_list() == ["col1", "col2", "col3"]
assert dataframe.drop(columns=["col1", "col3"], inplace=True) is None
assert dataframe.columns.to_list() == ["col2"]
def test_dataframe_drop_index_inplace_returns_none(
# Drop index depends on the actual data, not just metadata, so use the
# local engine for more robust testing.
polars_session: bigframes.session.Session,
):
dataframe = polars_session.read_pandas(
pd.DataFrame({"col1": [1, 2, 3], "index_col": [0, 1, 2]}).set_index("index_col")
)
assert dataframe.index.to_list() == [0, 1, 2]
assert dataframe.drop(index=[0, 2], inplace=True) is None
assert dataframe.index.to_list() == [1]
def test_dataframe_drop_columns_returns_new_dataframe(monkeypatch: pytest.MonkeyPatch):
dataframe = mocks.create_dataframe(
monkeypatch, data={"col1": [1], "col2": [2], "col3": [3]}
)
assert dataframe.columns.to_list() == ["col1", "col2", "col3"]
new_dataframe = dataframe.drop(columns=["col1", "col3"])
assert dataframe.columns.to_list() == ["col1", "col2", "col3"]
assert new_dataframe.columns.to_list() == ["col2"]
def test_dataframe_semantics_property_future_warning(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)
with bigframes.option_context("experiments.semantic_operators", True), pytest.warns(
FutureWarning
):
dataframe.semantics
def test_dataframe_ai_property_future_warning(
monkeypatch: pytest.MonkeyPatch,
):
dataframe = mocks.create_dataframe(monkeypatch)
with pytest.warns(FutureWarning):
dataframe.ai