pastebin - collaborative debugging tool
fferen.kpaste.net RSS


textbook.com code
Posted by Anonymous on Fri 7th Jan 2011 00:52
raw | new post

  1. import random
  2. import re
  3. import urllib2
  4.  
  5. PAGE_REGEX = re.compile('([0-9, ]+) p(?:\.|(?:ages))')
  6.  
  7. def getBetween(searched, start, end, startI=0):
  8.     sInd = searched.find(start, startI)
  9.     if sInd == -1:
  10.         return '', -1
  11.     sInd += len(start)
  12.     eInd = searched.find(end, sInd)
  13.     if eInd == -1:
  14.         return '', -1
  15.     return searched[sInd:eInd], eInd + len(end)
  16.  
  17. def getIsbns(url):
  18.     isbns = []
  19.     done = False
  20.     for pg in xrange(1, 5000):
  21.         print 'start page %d' % pg
  22.         html = urllib2.urlopen(url + '?PG=%d' % pg).read()
  23.         newI = 0
  24.         first = True
  25.         while True:
  26.             foo, newI = getBetween(html, "<span class='regGray11px'>(", '</span>', newI)
  27.             if not foo:
  28.                 if first:
  29.                     done = True
  30.                 break
  31.             first = False
  32.             isbn = getBetween(foo, 'ISBN10: ', ';')[0]
  33.             if isbn and 'Hardback' in foo:
  34.                 isbns.append(isbn)
  35.         if done:
  36.             break
  37.     return isbns
  38.  
  39. def getNumPages(isbn):
  40.     """return None if not found"""
  41.     loc = urllib2.urlopen('http://isbndb.com/search-all.html?kw=' + isbn).geturl()
  42.     match = re.search(PAGE_REGEX, urllib2.urlopen(loc).read())
  43.     if match:
  44.         print match.group(1)
  45.         return max(int(p) for p in match.group(1).replace(',', '').split())
  46.     return None
  47.  
  48. isbns = list(set(getIsbns('http://www.textbooks.com/Catalog/KME/Calculus-for-K-12.php')))
  49. random.shuffle(isbns)
  50. print len(isbns)
  51.  
  52. nums = []
  53. for isbn in isbns:
  54.     num = getNumPages(isbn)
  55.     if num:    
  56.         nums.append(num)
  57.         print isbn, nums

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at