forked from billryan/algorithm-exercise
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_source.py
More file actions
executable file
·127 lines (107 loc) · 3.97 KB
/
parse_source.py
File metadata and controls
executable file
·127 lines (107 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
import os
import sys
from pyquery import PyQuery as pq
import html2text
class Lintcode(object):
def __init__(self):
self.url_algo = "http://www.lintcode.com/en/problem/"
def get_src_page(self, src_url):
src_page = pq(url=src_url)
return src_page
def get_src_detail(self, src_url):
src_page = self.get_src_page(src_url)
src_detail = {}
problem_detail = src_page('#problem-detail')
difficulty_level = problem_detail('h4')('.label').text()
title = problem_detail('h4')('.m-l-sm').text()
raw_tags = problem_detail('#tags')('a')
tags = [tag.text for tag in raw_tags]
raw_detail = src_page('#problem-detail')('div').html()
body_start = raw_detail.find('<p>')
body_end = raw_detail.find('<b>Tags</b>')
raw_body = raw_detail[body_start:body_end]
body = raw_body.replace('<b>', '<h4>')
body = body.replace('</b>', '</h4>')
src_detail['title'] = title
src_detail['tags'] = tags
src_detail['level'] = difficulty_level
src_detail['body'] = body
return src_detail
class Leetcode(object):
def __init__(self):
self.url_algo = "https://leetcode.com/problemset/algorithms/"
def get_src_page(self, src_url):
return pq(url=src_url)
def get_src_detail(self, src_url):
src_page = self.get_src_page(src_url)
src_detail = {}
problem_detail = src_page('.question-content')
raw_detail = problem_detail.html()
body_start = raw_detail.find('<p>')
body_end = raw_detail.find('<div>')
raw_body = raw_detail[body_start:body_end]
body = raw_body.replace('<b>', '<h4>')
body = body.replace('</b>', '</h4>')
src_detail['body'] = body
return src_detail
def get_src_tags(src_page):
raw_tags = src_page('.btn.btn-xs.btn-primary')
return [tag.text() for tag in raw_tags.items()]
def get_src_title(src_page):
raw_title = src_page('title').text().split('|')[0][:-1]
def get_difficulty(src_page):
search_url = "https://leetcode.com/problemset/algorithms/"
class Hihocoder(object):
def __init__(self):
self.url_algo = "http://hihocoder.com/contest/mstest2015april/problems"
def get_src_page(self, src_url):
return pq(filename=src_url)
def get_src_title(self, src_page):
raw_title = src_page('h3.panel-title').text()
# Title begins after ': '
start_index = raw_title.find(': ')
return raw_title[start_index + 2:]
def get_src_detail(self, src_url):
src_page = self.get_src_page(src_url)
src_detail = {}
raw_detail = src_page('#tl-problem-content').html()
title = self.get_src_title(src_page)
body = raw_detail
src_detail['title'] = title
src_detail['body'] = body
return src_detail
def main(argv):
if (len(argv) != 2):
print("Usage: python parse_source.py problem_url")
scripts, url = argv
hihocoder_url = 'http://hihocoder.com'
lintcode_url = 'http://www.lintcode.com'
leetcode_url = 'https://leetcode.com'
h = html2text.HTML2Text()
if url.startswith(lintcode_url):
lintcode = Lintcode()
src_body = lintcode.get_src_detail(url)['body']
print("### Problem Statement")
print("")
print(h.handle(src_body))
elif url.startswith(leetcode_url):
leetcode = Leetcode()
src_body = leetcode.get_src_detail(url)['body']
print("### Problem Statement")
print("")
print(h.handle(src_body))
else:
# temp
hihocoder = Hihocoder()
src_title = hihocoder.get_src_detail(url)['title']
src_body = hihocoder.get_src_detail(url)['body']
print("# " + src_title)
print("")
print("## Question")
print("")
print("### Problem Statement")
print("")
print(h.handle(src_body))
if __name__ == "__main__":
main(sys.argv)