Back

Explore Courses Blog Tutorials Interview Questions
0 votes
2 views
in AI and Deep Learning by (680 points)
I want to build a small AI program using Python. How can I do that?

1 Answer

0 votes
by (7.2k points)

import requests 

import random 

import nltk 

from nltk.corpus import stopwords,wordnet 

import re 

english_stops = set(stopwords.words('english')) 

# below words were  intentionally removed , based on few analytical stuff  

english_stops.add('.') 

english_stops.add('also')    

english_stops.add('get') 

 

def feature_extractor(document): 

    features = {} 

    ls =[] 

    for document_words in document: 

        for i,word in enumerate(document_words): 

            if ((word.lower() not in english_stops 

                 and word.isalpha()) 

                or word.lower() =='not'): 

                syn= wordnet.synsets(word.lower()) 

                v='no' 

                n='no' 

                r='no' 

                a='no' 

                identity='unknown' 

                if len(syn) is 0: 

                    identity='unknown' 

                    continue 

                else: 

                    identity='known' 

                word = word.lower() 

                features['contains(%s) with identity %s'% (word.lower(),identity)] = True 

    return features 

 

def pre_train_processor(sent): 

    rd=re.sub(r'(^@| @)[^ ]+',r"",sent) 

    rd=re.sub(r'(^http| http)[^ ]+',r"",rd) 

    rd=re.sub(r'(\.)*\1',r".",rd) 

    rep=re.findall(r'(\w).*\1.*\1', rd)     

     

    for ch in rep: 

        rd=re.sub(r''+ch+ch+ch+r'+',r''+ch+ch,rd) 

    sentences = nltk.sent_tokenize(rd) 

    sentences = [nltk.word_tokenize(sent) for sent in sentences] 

    return sentences 

 

def train_classifier(): 

    l = ['1006589563512903396', '1008324413029484000', '1040575996755346578', '1045137713186360887', '7022552177697192106','674420826054740334', '698451933787252187','414302334487557591','8944057146243258823','6946341970593481260','1431312376514000040','7864797704321962005','7070434591968454556'] 

    app_id = '' 

    app_key = '' 

    final_resp = [] 

    featuresets = [] 

    url ="http://developer.goibibo.com/api/voyager/get_hotels_by_cityid/?app_id=<enter your app id without <> >&app_key=<enter your app key with <> >&city_id=6771549831164675055" 

    res = requests.get(url) 

    result = res.json() 

 

 

    print len(result['data'].keys()) 

    count = 0 

    for key in result['data'].keys(): 

        count +=1 

        print count 

        vid = result['data'][key]['hotel_geo_node']['_id'] 

        #vid = data.get('vid','7022552177697192106') 

 

        limit = "50" 

        url = "http://ugc.goibibo.com/api/HotelReviews/forWeb?app_id=" + app_id + "&app_key=" + app_key + "&vid=" + vid+"&limit=" + limit + "&offset=0" 

        res = requests.get(url) 

        reslt = res.json() 

        try: 

            for review in reslt: 

                if review.get('reviewContent') and len(review['reviewContent'].strip()) > 10: 

                    final_resp.append({'rating': review['totalRating'], 'text': review['reviewContent']}) 

        except Exception,e: 

            print str(e) 

 

    for x in final_resp: 

        data = feature_extractor(pre_train_processor(x['text'])) 

        if x['rating'] > 3: 

            tag = 'good' 

        elif x['rating'] == 3: 

            tag = 'moderate' 

        else: 

            tag = 'bad' 

        featuresets.append((data, tag)) 

    print "lenght of data set: " + str(len(featuresets)) 

    feat_len = int(0.8*len(featuresets)) 

    train_set, test_set = featuresets[:feat_len], featuresets[feat_len:] 

    classifier = nltk.NaiveBayesClassifier.train(train_set) 

    print nltk.classify.accuracy(classifier, test_set) 

    return classifier 

 

def check_pos_hotels(classifier, data): 

 

    sentences = pre_train_processor(data) 

         

    feat = feature_extractor(sentences) 

     

    res={} 

    res['features']=feat 

    res['tag']=classifier.classify(feat) 

    return res 

Browse Categories

...