0 votes
1 view
in Azure by (5.4k points)

Background: I am working on a project that aims to classify product reviews into positive and negative using Sentiment Analysis in Azure ML. I got stuck when I was classifying reviews into different departments.

I am basically reading words from CSV files and checking whether the review(v: list of sentences) contains these words. If some of these words are found in the review then I am noting the sentence number and pushing it into respective lists( FinanceList, QualityList, LogisticsList ). In the end, I am converting the lists to strings and pushing them into a data frame.

The output is not getting logged for the print statements that I have written in the script in Azure ML.

The values in the data frame are always turning out to be 0 but when I run the code locally I get the expected output.

Description of First Image: The columns of the data frame showing 0 values.

Description of the Second Image: I have highlighted the expected output that I got locally for the same review which was used in AzureML.

Image 1

Image 2

The things that I have already checked:

  1. The CSV files are read properly.
  2. The review contains the words that I am searching for.

I am unable to understand where I am going wrong.

import csv

import math

import pandas as pd

import numpy as np

def azureml_main( data, ud):

   FinanceDept = []

   LogisticsDept = []

   QualityDept = []

  #Reading from the csv files

   with open('.\Script Bundle\\quality1.csv', 'rb') as fin:

      reader = csv.reader(fin)

      QualityDept = list(reader)

   with open('.\Script Bundle\\finance1.csv', 'rb') as f:

      reader = csv.reader(f)

      FinanceDept = list(reader)

   with open('.\Script Bundle\\logistics1.csv', 'rb') as f:

      reader = csv.reader(f)

      LogisticDept = list(reader)

   FinanceList = []

   LogisticsList = []

   QualityList = []

#Initializing the Lists   

   FinanceList.append(0)

   LogisticsList.append(0)

   QualityList.append(0)

   rev = data['Data']

   v = rev[0].split('.')

   print FinanceDept

   S = 0   

   for sentence in v:

      S = S + 1

      z = sentence.split(' ')

      for c in z:

         c = c.lower()

         if c in FinanceDept and S not in FinanceList:

            FinanceList.append(S)

         if c in LogisticsDept and S not in LogisticsList:

            LogisticsList.append(S)

         if c in QualityDept and S not in QualityList:

            QualityList.append(S)

   #Compute User Reputation Score

   Upvotes = int(ud['upvotes'].tolist()[0])

   Downvotes = int(ud['downvotes'].tolist()[0])

   TotalVotes = max(1,Upvotes+Downvotes)

   q = data['Score']

   print FinanceList

   repScore = float(Upvotes)/TotalVotes 

   repScore = repScore*float( q[0] )

   str1 = ','.join(str(e) for e in FinanceList) 

   str2 = ','.join(str(e) for e in QualityList)

   str3 = ','.join(str(e) for e in LogisticsList)

   x = ud['id']

   #df = pd.DataFrame(  [str(repScore), str1  , str2  , str3 ], columns=[Write the columns])

   d = {'id': x[0], 'Score': float(repScore),'Logistics':str3,'Finance':str1,'Quality':str2}

   df = pd.DataFrame(data=d, index=np.arange(1))

   return df

1 Answer

0 votes
by (8.7k points)

Here is a model for predicting review score of movie reviews. You can take inspiration from this for simpler method: Click here

If you want to write a python script, here is a code that classifies text from two csv sheets stored in a zip file:

import csv

import numpy as np

import pandas as pd

def azureml_main(dataframe1 = None, dataframe2 = None):

    # Execution logic goes here

    #print('Input pandas.DataFrame #1:\r\n\r\n{0}'.format(dataframe1))

    # If a zip file is connected to the third input port is connected,

    # it is unzipped under ".\Script Bundle". This directory is added

    # to sys.path. Therefore, if your zip file contains a Python file

    # mymodule.py you can import it using:

    # import mymodule

    apple = {}

    microsoft = {}

  #Reading from the csv files

    with open('.\Script Bundle\\apple.csv', 'rb') as f:

      reader = csv.reader(f)

      apple = list_to_dict(list(reader)[0])

    with open('.\Script Bundle\\microsoft.csv', 'rb') as f:

      reader = csv.reader(f)

      microsoft = list_to_dict(list(reader)[0])

#    print('hello world' + ' '.join(apple[0]))

    applecount = 0

    microsoftcount = 0

    input = "i want to buy surface which runs on windows"

    splitted_input = input.split(' ')

    for word in splitted_input:

        if word in apple:

            applecount = applecount + 1

        if word in microsoft:

            microsoftcount = microsoftcount + 1

    print("apple bag of words count - " + str(applecount))

    print("microsoft bag of words count - " + str(microsoftcount))

    mydata = [{'input words': len(splitted_input)}, {'applecount':applecount},

        {'microsoftcount':microsoftcount}]       

    # Return value must be of a sequence of pandas.DataFrame

    return pd.DataFrame(mydata),

def list_to_dict(li):      

    dct = {}  

    for item in li:

        if dct.has_key(item):              

            dct[item] = dct[item] + 1  

        else:  

            dct[item] = 1  

    return dct  

...