#!/usr/bin/env python # # UsernameGen # Copyright (C) 2009 Jason Wood (JW Network Consulting) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA __version__ = '2.1.1' __author__ = 'Jason Wood - aka Tadaka' __doc__ = """ Originally google.py Google (GNUCITIZEN) http://www.gnucitizen.org by Petko D. Petkov; pdp (arhictect) for Python 2.5 Name changed to usernameGen.py and modified by Jason Wood http://www.jwnetworkconsulting.com Changes: 6/28/2009 - Added "paddbottom" tag fix by Robin Wood for a new tag to match on - Added fix for the regex by Robin Wood - Added scheme for lastname + first initial usernames 6/16/2009 - Added handling for when there are middle names. creates usernames in the format of first initial, middle initial, last name 6/12/2009 - URL was changed to use mobile search - Regex was tweaked so it would work again - Limited the response to only the title of each result. - Added a new class and method which: - takes the results of the title for the result and extracts the persons first and last name - then makes usernames with the first initial & last name and first name & last initial """ import re import urllib import urllib2 import logging class Get(object): """ Get The power of python in a single object """ def __init__(self): self.user_agent = 'User-Agent: Mozilla/5.0 ' \ + '(Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.11) ' \ + 'Gecko/2009060214 Firefox/3.0.11' def get(self, url): """ get(url) -> Response Open given url and return response. """ try: request = urllib2.Request(url) request.add_header('User-Agent', self.user_agent) logging.debug('Get.get - getting url ' + url) result = urllib2.urlopen(request) except: raise RuntimeError('unable to open url') return result class Search(Get): """ Search The power of Google in a single object """ def search(self, q, start = 0, num = 10): """ search(q, start = 0, num = 10) -> generator Do google web search. """ url = 'http://www.google.com/m/search?' query = urllib.urlencode({'q':q, 'start':start, 'num':num}) result = self.get(url + query) content = result.read() # print content tokens = re.findall( # '\s*(.*?)', content) '\s*(.*?)', content) results = [] for token in tokens: # print token title = token[2] logging.debug('Search.search - found url ' + url) results.append((title)) return results class Crawl(Search): """ Crawl The power of Google in a single object """ def crawl(self, q, depth = 0): """ crawl(q, depth = 0) -> generator Do google web crawl. """ index = 1 last_results = None while True: if index == 1: start = 0 else: start = (index - 1) * 10 try: results = self.search(q, start, 10) except: continue if not results: break if last_results == results: break last_results = results yield results if index == depth: break index = index + 1 class MungeUsernames(Crawl): """ Create usernames out of the search results """ def mungeusers(self, searchterms): """ set everything to lower case to start """ names_raw = searchterms.lower() # print names_raw """ remove " - linkedin" from each row that has it then remove ": directory" from every row that has it assign the leftover values to the name variable """ matcher = re.match( r'(.*) - linkedin', names_raw, re.M|re.I) name = "" if matcher: match_directory = re.match( r'(.*): directory', matcher.group(1), re.M|re.I) if match_directory: name = match_directory.group(1) else: name = matcher.group(1) """ Break the name up into first and last names. create the username variations with the first initial, last name and first name, last initial """ if name != "": full_name = name.split(" ") if len(full_name) == 2: fname = full_name[0] lname = full_name[1] uname = fname[0] + lname print uname uname = fname + lname[0] print uname uname = lname + fname[0] print uname elif len(full_name) == 3: fname = full_name[0] mname = full_name[1] lname = full_name[2] uname = fname[0] + lname print uname uname = fname + lname[0] print uname uname = lname + fname[0] print uname uname = fname[0] + mname[0] + lname print uname if __name__ == '__main__': import os import sys import time import signal signal.signal(signal.SIGINT, lambda signum, frame: sys.exit()) if len(sys.argv) < 3: print 'usage:', os.path.basename(sys.argv[0]), 'query #-of-results-pages' sys.exit() logging.basicConfig() g = Crawl() u = MungeUsernames() search_term = "site:linkedin.com " search_term += ' '.join(sys.argv[1:]) for result in g.crawl(search_term, int(sys.argv[2])): # for result in g.crawl(search_term, 20): for entry in result: u.mungeusers(entry) time.sleep(1)