def
get_iphonex
(): client = pymongo.MongoClient() db = client[
"iphonex"
] items = db[
"items"
] prices = [] sales = [] com_counts = [] urls = [] titles = [] nids = [] for item in items.find(): title = item[
"raw_title"
] if (re.search(r
"iphonex"
, title, re.I) or \ re.search(r
"iphone x"
, title, re.I)) and \ not re.search(r
"iphone 8"
, title, re.I) and \ not re.search(r
"iphone8"
, title, re.I): titles.append(title) else: continue nids.append(item[
"nid"
]) url = item[
"detail_url"
] urls.append(url) view_price = item.setdefault(
"view_price"
,
"0"
) prices.append(
float
(view_price)) comment_count =
0
if
"comment_count"
in item and item[
"comment_count"
]: comment_count =
int
(item[
"comment_count"
]) com_counts.append(comment_count) view_sales = item.setdefault(
"view_sales"
,
"0"
) matched = re.match(r
'(\d+)'
, view_sales) if matched: view_sales_num = matched.group(
1
) sales.append(
int
(view_sales_num)) else: sales.append(-
1
) pd.set_option(
'display.max_colwidth'
, -
1
) df = pd.DataFrame({
"price"
: prices,
"sales"
: sales,
"urls"
: urls,
"titles"
: titles,
"nids"
: nids,
"comment_count"
: com_counts}) df.drop_duplicates(subset=
"nids"
, keep=
"last"
, inplace=
True
) df_test = df[df.price>
6000
][(df.sales!=
0
) | (df.comment_count!=
0
)] df_train = df_test[df_test.comment_count >
200
][df_test.sales >
0
] reg = sklearn.linear_model.LinearRegression(fit_intercept=
False
) reg.fit(df_train[[
"comment_count"
]], df_train[
"sales"
]) df_tofill = df_test[df_test.sales==
0
] df_tofill[
"sales"
] = reg.predict(df_tofill[[
"comment_count"
]]) df_test[df_test.sales ==
0
] = df_tofill print
"iPhone X sold number:"
, df_test.sales.
sum
() print
"iPhone X sold money:"
, np.
sum
(df_test.sales*df_test.price)get_iphonex()