生成CSV用于测试:

 1# -*- coding: utf-8 -*-
 2import csv, threading
 3import random
 4
 5URLS = ['http://www.163.com/aaaa/bbb/ccc.jsp&path=1234',
 6        'http://wap.roboo.com/pages/c.jsp',
 7        'http://www.sina.com.cn/sjf/sfsfsdf/sfsfsafsda.aspx&sdf=sfsdf&sdfsdf=%&%^*',
 8        'http://sohu.cn/sjflsjlfjsdf/safsfd/safasfsaf/sf.jsp',
 9        'http://tianya.cn/sfsf/sf/sf/sf/s/f/sfs.aaa'
10        'http://download.org.cn/s/fs/f/s/f/a.af',
11        'http://www.roboo.com/sring.jsp/sjalfj.jsp&sdlfajlsf=1493932',
12        'http://houhouhou/alsjdf/sa/f/sa/f.sf',
13        'http://asfsf.cn/safs.asfdd',
14        'http://哦哦/saf.aspx&234..23j4l2j34/safd.aspx',
15        'http://www.263.net/sfsfds/sfsdf.jsp&12323&2123234&23424=23423'
16        ]
17
18TITLES = ['爱莎莎莎',
19          '什么和生命',
20          '生活的意义',
21          '新浪蓝景',
22          '如薄啊,无敌铁',
23          '神龙见附录三顿饭',
24          '我就哦哦将为您',
25          '宁波ihoiha',
26          '阿加莎刻录机服了你',
27          '宁波啊遏体哦你'    
28          ]
29
30MAX_TREADS = 10
31global currentThreads
32lock = threading.RLock()
33
34class TCSVGenerater(threading.Thread):
35    def __init__(self, data, filename, mode = 'a'):
36        global currentThreads
37        self._data = data
38        self._filename = filename
39        self._mode = mode
40        lock.acquire()
41        currentThreads += 1
42        lock.release()
43        threading.Thread.__init__(self)
44        
45    def run(self):
46        global currentThreads
47        writer = csv.writer(open(self._filename, self._mode + 'b'))
48        writer.writerows(self._data)
49        
50        lock.acquire()
51        currentThreads -= 1
52        lock.release()
53        
54if __name__ == '__main__':    
55    data = []    
56    currentThreads = 0
57    completed = False
58    
59    for i in range(20):
60        if currentThreads > MAX_TREADS:
61            continue
62        else:
63            file = 'd:/CSV/titleurl1.csv'
64            for j in range(10000):
65                url = URLS[random.randrange(10)]
66                title = TITLES[random.randrange(10)]
67                data.append((title,  url))
68                
69            thread = TCSVGenerater(data, file)
70            thread.start()
71            thread.join()
72            print '', i, '份随机10000数据已写入。'


统计的代码:
 1import urlparse, csv
 2import threading
 3from operator import itemgetter
 4
 5global DICTHOSTS
 6DICTHOSTS = {}
 7
 8class TXTReader(threading.Thread):
 9    def __init__(self, filename):
10        self._file = filename
11        threading.Thread.__init__(self)
12        
13    def run(self):
14        global DICTHOSTS
15        try:
16            fhandle = open(self._file, 'r')
17            for line in fhandle:
18                host = urlparse.urlsplit(line.split(',')[1])[1]
19                if host <> '':
20                    if DICTHOSTS.has_key(host):
21                        DICTHOSTS[host] += 1
22                    else:
23                        DICTHOSTS[host] = 1
24        finally:
25            fhandle.close()
26
27
28if __name__ == '__main__':
29    sortedList = []
30    reader = TXTReader('d:/CSV/titleurl1.csv')
31    reader.start()
32    reader.join()
33    print DICTHOSTS
34    
35    sortedList = sorted(DICTHOSTS.items(), key = itemgetter(1), reverse = True)
36    
37    writter = csv.writer(open('d:/CSV/Result.csv''wb'))
38    writter.writerows(sortedList) 
39    
40