TypeError: 'NoneType' object is not callable - Beautifulsoup 4

Question

asked Jul 29, 2019 in Python by Rajesh Malhotra (19.9k points)

I'm trying to save the scraped data in the Postgres database. I want to use django models for this.

I tried to use the Psycopg2 package before, but I found out that it is unnecessary so I decided to use just django models. the data did not go to the database also when I used the Psycopg2 package.

I get this error:

Traceback (most recent call last):
File "/home/xxxx/Desktop/project/django/tnapp/scrap.py", line 61, in <module>
scraped_author = author(name='author name')
TypeError: 'NoneType' object is not callable

Scraper:

import requests
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from collections import Counter
import psycopg2
# from sqlalchemy.dialects.postgresql import psycopg2
url = 'https://teonite.com/blog/page/{}/index.html'
all_links = []
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0'
}
with requests.Session() as s:
r = s.get('https://teonite.com/blog/')
soup = bs(r.content, 'lxml')
article_links = ['https://teonite.com' + item['href'][2:] for item in soup.select('.post-content a')]
all_links.append(article_links)
num_pages = int(soup.select_one('.page-number').text.split('/')[1])
for page in range(2, num_pages + 1):
r = s.get(url.format(page))
soup = bs(r.content, 'lxml')
article_links = ['https://teonite.com' + item['href'][2:] for item in soup.select('.post-content a')]
all_links.append(article_links)
all_links = [item for i in all_links for item in i]
d = webdriver.Chrome(ChromeDriverManager().install())
contents = []
authors = []
for article in all_links:
d.get(article)
soup = bs(d.page_source, 'lxml')
[t.extract() for t in soup(['style', 'script', '[document]', 'head', 'title'])]
visible_text = soup.getText()
content = soup.find('section', attrs={'class': 'post-content'})
contents.append(content)
author = soup.find('span', attrs={'class': 'author-content'})
authors.append(author)
unique_authors = list(set(authors))
unique_contents = list(set(contents))
try:
print(soup.select_one('.post-title').text)
except:
print(article)
print(soup.select_one('h1').text)
break # for debugging
d.quit()
scraped_author = author(name='author name')
author.save()

Models:

from django.db import models
class author(models.Model):
author_id = models.CharField(primary_key=True, max_length=50, editable=False)
author_name = models.CharField(max_length=50)
class Meta:
ordering = ['-author_id']
db_table = 'author'
class stats(models.Model):
content = models.CharField(max_length=50)
stats = models.IntegerField()
class Meta:
ordering = ['-stats']
db_table = 'stats'
class authorStats(models.Model):
author_id = models.CharField(max_length=100)
content = models.CharField(max_length=100)
stats = models.IntegerField()
class Meta:
ordering = ['stats']
db_table = 'author_stats'

1 Answer

Anirudh Singh · Answer 1 · 2019-07-29T05:33:05+0000

You are getting this error because you have two things named author, your model and the value you find in the scraped content.To resolve the issue follow python’s coding conventions and rename model to Author. Also always name a class with the first character capitalized.

TypeError: 'NoneType' object is not callable - Beautifulsoup 4

1 Answer

Related questions

Browse By Domains

Popular Courses

Popular Tutorials

Popular Resources