Browse code
Adding a distinctive user agent
fiddlerwoaroof authored on 21/10/2015 23:47:23
Showing 1 changed files
Showing 1 changed files
... | ... |
@@ -13,11 +13,12 @@ def titlecase(line): |
13 | 13 |
|
14 | 14 |
class DefaultTitleGetter(object): |
15 | 15 |
url_cleaner = re.compile('[+\-_]') |
16 |
+ user_agent = {'User-Agent': 'Marrow Title Getter: https://joinmarrow.com'} |
|
16 | 17 |
|
17 | 18 |
def get_title(self, url): |
18 | 19 |
s = requests.session() |
19 | 20 |
scheme, netloc, path, params, query, fragment = urlparse.urlparse(url, 'http') |
20 |
- data = s.get(url) |
|
21 |
+ data = s.get(url, headers=self.user_agent) |
|
21 | 22 |
etree = lxml.html.fromstring(data.content.decode(data.encoding)) |
22 | 23 |
|
23 | 24 |
canonicalLink = etree.xpath('//link[@rel="canonical"]/@href') |