1import urlparse, csv 2import threading 3from operator import itemgetter 4 5DICTHOSTS = {} 6 7class CSVReader(threading.Thread): 8 def __init__(self, threadname, filename): 9 self._file = filename 10 threading.Thread.__init__(self, name = threadname) 11 12 def run(self): 13 global DICTHOSTS 14 try: 15 _fhandle = open(self._file, 'r') 16 for line in _fhandle: 17 try: 18 host = urlparse.urlsplit(line.split('","')[1])[1] 19 except IndexError: 20 pass 21 22 if host: 23 if DICTHOSTS.has_key(host): 24 DICTHOSTS[host] += 1 25 else: 26 DICTHOSTS[host] = 1 27 finally: 28 _fhandle.close() 29 30 print self.getName(), self._file, 'has been traversed.', 'DICTHOSTS has', len(DICTHOSTS), 'items.' 31 32if __name__ == '__main__': 33 sortedList = [] 34 path = 'E:/workspace/URLsSorter/src/titleurl/' 35 for i in range(1, 21): 36 si = str(i) 37 reader = CSVReader('Thread' + si, path + 'titleurl' + si + '.csv') 38 reader.start() 39 reader.join() 40 41 42 sortedList = sorted(DICTHOSTS.items(), key = itemgetter(1), reverse = True) 43 44 writter = csv.writer(open(path + 'Result.csv', 'wb')) 45 writter.writerows(sortedList)
|
|
|
| 日 | 一 | 二 | 三 | 四 | 五 | 六 |
---|
24 | 25 | 26 | 27 | 28 | 29 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 1 | 2 | 3 | 4 | 5 |
|
导航
统计
- 随笔: 42
- 文章: 0
- 评论: 12
- 引用: 0
常用链接
留言簿(1)
随笔分类(32)
随笔档案(42)
文章分类
最新随笔
最新评论
阅读排行榜
评论排行榜
|
|