3 Generate a dash docset from a doxygen documentation. 8 from pathlib
import Path
20 CLASS_FILE_RE = re.compile(
r'class([a-zA-Z_][a-zA-Z0-9_]*)_1_1([a-zA-Z_][a-zA-Z0-9_]*)\.html')
21 CLASS_RE = re.compile(
'fw4spl: (.+) Class Reference')
22 STRUCT_FILE_RE = re.compile(
r'struct([a-zA-Z_][a-zA-Z0-9_]*)_1_1([a-zA-Z_][a-zA-Z0-9_]*)\.html')
23 STRUCT_RE = re.compile(
'fw4spl: (.+) Struct Reference')
24 NAMESPACE_FILE_RE = re.compile(
r'namespace.+\.html')
25 NAMESPACE_RE = re.compile(
'fw4spl: ([a-zA-Z_][a-zA-Z0-9_:]*) Namespace Reference')
26 SRV_RE = re.compile(
'fw4spl: ([a-zA-Z_][a-zA-Z0-9_]*::(?:[a-zA-Z_][a-zA-Z0-9_]*::)*(S[A-Z0-9][a-zA-Z0-9_]*)) Class Reference')
27 BAD__SRV_RE = re.compile(
'fw4spl: ([a-zA-Z_][a-zA-Z0-9_]*::(?:[a-zA-Z_][a-zA-Z0-9_]*::)*([A-Z0-9][a-zA-Z0-9_]*)) Class Reference')
28 OBJ_RE = re.compile(
'fw4spl: ([a-zA-Z_][a-zA-Z0-9_]*::(?:[a-zA-Z_][a-zA-Z0-9_]*::)*([A-Z0-9][a-zA-Z0-9_]*)) Class Reference')
29 IFACE_RE = re.compile(
'fw4spl: ([a-zA-Z_][a-zA-Z0-9_]*::(?:[a-zA-Z_][a-zA-Z0-9_]*::)*(I[A-Z0-9][a-zA-Z0-9_]*|IService)) Class Reference')
30 EXCEPT_RE = re.compile(
'fw4spl: ([a-zA-Z_][a-zA-Z0-9_]*::(?:[a-zA-Z_][a-zA-Z0-9_]*::)*([A-Z0-9][a-zA-Z0-9_]*)) Struct Reference')
34 re.compile(
'pages.html'),
35 re.compile(
r'dir_.+\.html'),
36 re.compile(
'.+_source.html')
41 Create the skeleton for the docset, i.e. the directory structure along with the SQLite database. Return the SQLite 45 Path(
'./fw4spl.docset/Contents/Resources').mkdir(parents=
True, exist_ok=
True)
48 db = Path(
'./fw4spl.docset/Contents/Resources/docSet.dsidx')
51 conn_ = sqlite3.connect(str(db))
53 cur.execute(
'CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
54 cur.execute(
'CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
60 Return a list containing the paths to all interesting HTML files contained at the root of the Doxygen html 61 directory. We're not interested in what's in the subdirectories. 64 for _, _, dir_files
in os.walk(
'./html/'):
65 files += [f
for f
in dir_files
if f.endswith(
'.html')]
70 Parse the 'pages.html' doxygen file and generate the list of related pages. 73 html = open(os.path.join(
'./html',
'pages.html'), encoding=
"utf8").read()
74 soup = bs4.BeautifulSoup(html,
"html.parser")
75 table = soup.find(
"table", class_=
"directory")
76 for cell
in table.find_all(
"tr"):
77 page_name = cell.td.a.string
78 page_link = cell.td.a.get(
'href')
79 pages.append((page_name,
"Guide", page_link))
85 Return the name of the repository that a particular documentation file was generated from, or None if not possible. 87 lists = f_soup.find_all(
'ul')
89 file_path = lists[-1].li.get_text()
90 for repo
in CFG[
'repositories']:
91 candidates = [repo
for repo
in CFG[
'repositories']
if file_path.startswith(repo)]
93 res = max(candidates, key=len)
94 return REPO_NAMES[res]
99 Parse a HTML file and return a (potentially empty) list of 3-tuples to add to the SQLite database. 106 if any(map(
lambda regexp: regexp.match(f_), FILE_SKIP_RE)):
109 html = open(os.path.join(
'./html', f_), encoding=
"utf8").read()
110 soup = bs4.BeautifulSoup(html,
"html.parser")
111 inherits_iservice = soup.find(class_=
'inherit_header pub_methods_classfwServices_1_1IService')
112 inherits_object = soup.find(class_=
'inherit_header pub_methods_classfwData_1_1Object')
113 inherits_exception = soup.find(class_=
'inherit_header pub_methods_classfwCore_1_1Exception')
118 "Interface": IFACE_RE,
119 "Exception": EXCEPT_RE
121 def is_item_type(soup, ty_str):
123 Test if the HTML contained in the supplied soup describes and element of the specified type based on the 124 doxygen page title. Accepted types are 'Service', 'Object', 'Interface' and 'Exception'. If true, return an 125 entry to add to the sqlite DB, else return None. 127 title = soup.title.get_text()
128 match = item_type_re[ty_str].search(title)
130 path = match.group(1)
133 path = path +
" ({})".format(repo)
134 return (path, ty_str, f_)
137 def is_bad_service(soup):
139 Test if the HTML contained in the supplied soup describes a service, with more lenient rules regarding 140 the name of the service. If true, print a warning regarding the service name and return an entry to add to 141 the sqlite DB, otherwise return None. 143 title = soup.title.get_text()
144 match = BAD__SRV_RE.search(title)
146 path = match.group(1)
150 path = path +
" ({})".format(repo)
151 print(
"Warning: service {} has non compliant name (no S prefix)".format(srv))
152 return (path,
"Service", f_)
157 "Namespace": NAMESPACE_RE,
160 def is_file_type(soup, ty_str):
162 Test if the HTML contained in the supplied soup describes and element of the specified type based on the 163 doxygen page title. Accepted types are 'Class', 'Namespace', and 'Struct'. If true, return an 164 entry to add to the sqlite DB, else return None. 166 title = soup.title.get_text()
167 match = file_type_re[ty_str].search(title)
169 struct_ = match.group(1)
170 return (struct_, ty_str, f_)
173 if CLASS_FILE_RE.match(f_):
175 class_triple = is_file_type(soup,
'Class')
176 if class_triple
is None:
178 class_name = class_triple[0]
179 if inherits_iservice:
181 triple = is_item_type(soup,
'Interface')
182 if triple
is not None:
183 new_entries.append(triple)
186 triple = is_item_type(soup,
'Service')
187 if triple
is not None:
188 new_entries.append(triple)
190 triple = is_bad_service(soup)
191 if triple
is not None:
192 new_entries.append(triple)
194 print(
"Warning: unexepected behaviour for class {} while parsing file {}".format(class_name, f_))
195 elif class_name ==
"fwData::Object":
197 new_entries.append((class_name,
"Class", f_))
198 elif inherits_object:
200 triple = is_item_type(soup,
'Object')
201 if triple
is not None:
202 new_entries.append(triple)
203 elif class_name ==
"fwCore::Exception":
205 new_entries.append((class_name,
"Exception", f_))
206 elif inherits_exception:
209 triple = is_item_type(soup,
'Exception')
210 if triple
is not None:
211 new_entries.append(triple)
214 new_entries.append(class_triple)
215 elif STRUCT_FILE_RE.match(f_):
217 struct_triple = is_file_type(soup,
'Struct')
218 if struct_triple
is None:
220 new_entries.append(struct_triple)
221 if inherits_exception:
224 triple = is_item_type(soup,
'Exception')
225 if triple
is not None:
226 new_entries.append(triple)
227 elif NAMESPACE_FILE_RE.match(f_):
230 namespace_triple = is_file_type(soup,
'Namespace')
231 if namespace_triple
is None:
233 namespace_name = namespace_triple[0]
234 if namespace_name
in CFG[
'srclibs']:
235 new_entries.append((namespace_name,
"Library", f_))
236 elif namespace_name
in CFG[
'bundles']:
239 new_entries.append((namespace_name,
"Package", f_))
241 new_entries.append(namespace_triple)
242 except UnicodeDecodeError:
243 print(
'The file ' + f_ +
' is not valid UTF-8')
244 except FileNotFoundError:
251 Fill the sqlite database with the supplied list of (name, entry_type, file_path) triples. 254 for triple
in services:
256 cur.execute(
"INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?, ?, ?);", triple)
258 print(
"Error inserting " + str(triple))
263 Copy the doxygen HTML files into the docset destination. 266 shutil.copytree(
'./html',
'./fw4spl.docset/Contents/Resources/Documents')
267 except shutil.Error
as err:
269 print(
"Warning: some files were not copied correctly. The generated docset might be incomplete.")
270 for src, _, why
in errors:
271 print(
"File '" + src +
"' was not copied correctly. Reason: " + why)
275 Builds the dash docset. 280 CFG = json.loads(open(
'./projects.json', encoding=
"utf8").read())
281 except (OSError, json.JSONDecodeError)
as err:
282 print(
"Error loading configuration file: " + str(err))
284 REPO_NAMES = {repo: Path(repo).parent.name
if Path(repo).name ==
"src" else Path(repo).name
for repo
in CFG[
'repositories']}
299 if __name__ ==
'__main__':
def populate_db(conn_, services)
def parse_related_pages()