Back

Explore Courses Blog Tutorials Interview Questions
0 votes
2 views
in Data Science by (17.6k points)

I'm doing some code practice and applying merging of data frames while doing this getting user warning

/usr/lib64/python2.7/site-packages/pandas/core/frame.py:6201: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=True'. To retain the current behavior and silence the warning, pass sort=False

On these lines of code: Can you please help to get the solution of this warning.

placement_video = [self.read_sql_vdx_summary, self.read_sql_video_km]

placement_video_summary = reduce(lambda left, right: pd.merge(left, right, on='PLACEMENT', sort=False), placement_video)

placement_by_video = placement_video_summary.loc[:, ["PLACEMENT", "PLACEMENT_NAME", "COST_TYPE", "PRODUCT",

                                                     "VIDEONAME", "VIEW0", "VIEW25", "VIEW50", "VIEW75",

                                                     "VIEW100",

                                                     "ENG0", "ENG25", "ENG50", "ENG75", "ENG100", "DPE0",

                                                     "DPE25",

                                                     "DPE50", "DPE75", "DPE100"]]

# print (placement_by_video)

placement_by_video["Placement# Name"] = placement_by_video[["PLACEMENT",

                                                            "PLACEMENT_NAME"]].apply(lambda x: ".".join(x),

                                                                                     axis=1)

placement_by_video_new = placement_by_video.loc[:,

                         ["PLACEMENT", "Placement# Name", "COST_TYPE", "PRODUCT", "VIDEONAME",

                          "VIEW0", "VIEW25", "VIEW50", "VIEW75", "VIEW100",

                          "ENG0", "ENG25", "ENG50", "ENG75", "ENG100", "DPE0", "DPE25",

                          "DPE50", "DPE75", "DPE100"]]

placement_by_km_video = [placement_by_video_new, self.read_sql_km_for_video]

placement_by_km_video_summary = reduce(lambda left, right: pd.merge(left, right, on=['PLACEMENT', 'PRODUCT'], sort=False),

                                       placement_by_km_video)

#print (list(placement_by_km_video_summary))

#print(placement_by_km_video_summary)

#exit()

# print(placement_by_video_new)

"""Conditions for 25%view"""

mask17 = placement_by_km_video_summary["PRODUCT"].isin(['Display', 'Mobile'])

mask18 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM", "CPCV"])

mask19 = placement_by_km_video_summary["PRODUCT"].isin(["InStream"])

mask20 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM", "CPE+", "CPCV"])

mask_video_video_completions = placement_by_km_video_summary["COST_TYPE"].isin(["CPCV"])

mask21 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE+"])

mask22 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM"])

mask23 = placement_by_km_video_summary["PRODUCT"].isin(['Display', 'Mobile', 'InStream'])

mask24 = placement_by_km_video_summary["COST_TYPE"].isin(["CPE", "CPM", "CPE+"])

choice25video_eng = placement_by_km_video_summary["ENG25"]

choice25video_vwr = placement_by_km_video_summary["VIEW25"]

choice25video_deep = placement_by_km_video_summary["DPE25"]

placement_by_km_video_summary["25_pc_video"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21],

                                                  [choice25video_eng, choice25video_vwr, choice25video_deep])

"""Conditions for 50%view"""

choice50video_eng = placement_by_km_video_summary["ENG50"]

choice50video_vwr = placement_by_km_video_summary["VIEW50"]

choice50video_deep = placement_by_km_video_summary["DPE50"]

placement_by_km_video_summary["50_pc_video"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21],

                                                  [choice50video_eng,

                                                   choice50video_vwr, choice50video_deep])

"""Conditions for 75%view"""

choice75video_eng = placement_by_km_video_summary["ENG75"]

choice75video_vwr = placement_by_km_video_summary["VIEW75"]

choice75video_deep = placement_by_km_video_summary["DPE75"]

placement_by_km_video_summary["75_pc_video"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21],

                                                  [choice75video_eng,

                                                   choice75video_vwr,

                                                   choice75video_deep])

"""Conditions for 100%view"""

choice100video_eng = placement_by_km_video_summary["ENG100"]

choice100video_vwr = placement_by_km_video_summary["VIEW100"]

choice100video_deep = placement_by_km_video_summary["DPE100"]

choicecompletions = placement_by_km_video_summary['COMPLETIONS']

placement_by_km_video_summary["100_pc_video"] = np.select([mask17 & mask22, mask19 & mask24, mask17 & mask21, mask23 & mask_video_video_completions],

                                                          [choice100video_eng, choice100video_vwr, choice100video_deep, choicecompletions])

"""conditions for 0%view"""

choice0video_eng = placement_by_km_video_summary["ENG0"]

choice0video_vwr = placement_by_km_video_summary["VIEW0"]

choice0video_deep = placement_by_km_video_summary["DPE0"]

placement_by_km_video_summary["Views"] = np.select([mask17 & mask18, mask19 & mask20, mask17 & mask21],

                                                   [choice0video_eng,

                                                    choice0video_vwr,

                                                    choice0video_deep])

#print (placement_by_km_video_summary)

#exit()

#final Table

placement_by_video_summary = placement_by_km_video_summary.loc[:,

                             ["PLACEMENT", "Placement# Name", "PRODUCT", "VIDEONAME", "COST_TYPE",

                              "Views", "25_pc_video", "50_pc_video", "75_pc_video","100_pc_video",

                              "ENGAGEMENTS","IMPRESSIONS", "DPEENGAMENTS"]]

#placement_by_km_video = [placement_by_video_summary, self.read_sql_km_for_video]

#placement_by_km_video_summary = reduce(lambda left, right: pd.merge(left, right, on=['PLACEMENT', 'PRODUCT']),

                                       #placement_by_km_video)

#print(placement_by_video_summary)

#exit()

# dup_col =["IMPRESSIONS","ENGAGEMENTS","DPEENGAMENTS"]

# placement_by_video_summary.loc[placement_by_video_summary.duplicated(dup_col),dup_col] = np.nan

# print ("Dhar",placement_by_video_summary)

'''adding views based on conditions'''

#filter maximum value from videos

placement_by_video_summary_new = placement_by_km_video_summary.loc[

    placement_by_km_video_summary.reset_index().groupby(['PLACEMENT', 'PRODUCT'])['Views'].idxmax()]

#print (placement_by_video_summary_new)

#exit()

# print (placement_by_video_summary_new)

# mask22 = (placement_by_video_summary_new.PRODUCT.str.upper ()=='DISPLAY') & (placement_by_video_summary_new.COST_TYPE=='CPE')

placement_by_video_summary_new.loc[mask17 & mask18, 'Views'] = placement_by_video_summary_new['ENGAGEMENTS']

placement_by_video_summary_new.loc[mask19 & mask20, 'Views'] = placement_by_video_summary_new['IMPRESSIONS']

placement_by_video_summary_new.loc[mask17 & mask21, 'Views'] = placement_by_video_summary_new['DPEENGAMENTS']

#print (placement_by_video_summary_new)

#exit()

placement_by_video_summary = placement_by_video_summary.drop(placement_by_video_summary_new.index).append(

    placement_by_video_summary_new).sort_index()

placement_by_video_summary["Video Completion Rate"] = placement_by_video_summary["100_pc_video"] / \

                                                      placement_by_video_summary["Views"]

placement_by_video_final = placement_by_video_summary.loc[:,

                           ["Placement# Name", "PRODUCT", "VIDEONAME", "Views",

                            "25_pc_video", "50_pc_video", "75_pc_video", "100_pc_video",

                            "Video Completion Rate"]]

1 Answer

0 votes
by (41.4k points)
edited by

This below code will resolve the issue:

from pandas import DataFrame, concat

a = DataFrame([{'a':1,      'c':2,'d':3 }])

b = DataFrame([{'a':4,'b':5,      'd':6,'e':7}])

>>> concat([a,b],sort=False)

   a    c d   b e

0  1 2.0  3 NaN NaN

0  4 NaN  6 5.0 7.0

>>> concat([a,b],sort=True)

   a    b c  d e

0  1 NaN  2.0 3 NaN

0  4 5.0  NaN 6 7.0

If you want to learn Python proogramming language for Data Science then you can watch this complete video tutorial:

Related questions

0 votes
1 answer
0 votes
1 answer
asked Aug 31, 2019 in Data Science by sourav (17.6k points)
0 votes
1 answer
0 votes
1 answer
0 votes
2 answers
asked Oct 10, 2019 in Python by Sammy (47.6k points)

Browse Categories

...