Index.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. <?php
  2. /**
  3. * Class with all functionality needed to create the index for the
  4. * php-gtk2-doc search class.
  5. *
  6. *
  7. * Index explanation:
  8. * - Only full words are found. If you search by "wind", "window" will *not* be found.
  9. * - up to 2 with "_" connected full words are found, e.g. when searching for "this_is",
  10. * the file "this_is_a_long_name" will be found, but searching for "this_is_a" will not
  11. * bring the former result, as only 2 directly connected words are indexed.
  12. * Note that full method names are indexed, too: "this_is_a_long_name" will be found
  13. * by searching for "this_is_a_long_name"
  14. * - The index is prioritized. This means that results are sorted by priority:
  15. * (Example search: "window")
  16. * - class names will be first
  17. * found: gtk.gtkwindow.php, gdk.gdkwindow.php
  18. * - methods which contain the search word in method name are second
  19. * found: gdk.gdkdragcontext.property.dest_window.php
  20. * - methods with the search word not in the direct method name are third
  21. * found: gdk.gdkwindow.method.lower.php
  22. *
  23. * @author Christian Weiske <cweiske@php.net>
  24. */
  25. class PhpGtkDoc_Search2_Index
  26. {
  27. protected static $arReserved = array(
  28. 'atk', 'gtk', 'gdk', 'scn', 'pango', 'method',
  29. 'property', 'prop', 'field', 'enum', 'signal', 'constructor'
  30. );
  31. protected static $arReservedMethods = array(
  32. 'get', 'set'
  33. );
  34. /**
  35. * Creates the search index.
  36. * Required parameters are the documentation directory
  37. * and the index file, as which the index shall be stored.
  38. */
  39. public static function createIndex($strDocumentationDirectory, $strIndexFile)
  40. {
  41. if (!file_exists($strDocumentationDirectory) || !is_dir($strDocumentationDirectory)) {
  42. throw new Exception('Documentation directory does not exist: ' . $strDocumentationDirectory);
  43. }
  44. if ((file_exists($strIndexFile) && !is_writable($strIndexFile))
  45. || (!file_exists($strIndexFile) && !is_writable(dirname($strIndexFile)))) {
  46. throw new Exception('Index file is not writable: ' . $strIndexFile);
  47. }
  48. file_put_contents($strIndexFile,
  49. serialize(
  50. self::buildIndexFromFiles(
  51. self::getFiles($strDocumentationDirectory),
  52. $strDocumentationDirectory
  53. )
  54. )
  55. );
  56. }//public static function createIndex($strDocumentationDirectory, $strIndexFile)
  57. /**
  58. * Creates an index array from the given files.
  59. * The filenames are meant to be relative to the doc directory,
  60. * so that e.g. "gdk/gdk.functions.html" or
  61. * "gdk/gdk.gdkcolormap.method.get_screen.html" are in it.
  62. *
  63. * The index array has the following structure:
  64. * [keyword]
  65. * - [1] priority level
  66. * - [doc file 1]
  67. * - [doc file 2]
  68. * - [2] priority level
  69. * - [doc file 1]
  70. * - [doc file 2]
  71. * - [3] priority level
  72. * - [doc file 1]
  73. * - [doc file 2]
  74. *
  75. * Priorities:
  76. * 1 class names, tutorial names, ...
  77. * 2 methods, signals, enums
  78. * 3 methods which have the keyword in the class name
  79. *
  80. * @param array $arFiles Array with all the files
  81. * @param string $strDocumentationDirectory The directory which the file names are relative to
  82. *
  83. * @return array The index array, can be used with PhpGtkDoc_Search2
  84. */
  85. protected static function buildIndexFromFiles($arFiles, $strDocumentationDirectory)
  86. {
  87. $arIndex = array();
  88. $arCamelCaseWords = self::getCamelCaseWords(
  89. self::getTitles(
  90. self::getTitleFiles(
  91. $arFiles
  92. ),
  93. $strDocumentationDirectory
  94. )
  95. );
  96. foreach ($arFiles as $strFile) {
  97. $strBaseFile = basename($strFile);
  98. $strBaseFile = substr($strBaseFile, 0, strrpos($strBaseFile, '.'));
  99. $arPieces = explode('.', $strBaseFile);
  100. //remove reserved words so that they are not indexed
  101. //$arPieces = array_diff($arPieces, self::$arReserved);
  102. //when uncommenting the last line, change all "$nCountWords > 2" to "$nCountWords > 1"
  103. $arNewPieces = array();
  104. $nCountWords = count($arPieces);
  105. $nWordPos = -1;
  106. foreach ($arPieces as $strWord) {
  107. $nWordPos++;//the indices do not have constant values (array_diff)
  108. if ($nWordPos == $nCountWords - 1 && $nCountWords > 2) {
  109. //last word in the filename
  110. $nPriority = 2;
  111. } else {
  112. //not the last word in the filename
  113. $nPriority = 1;
  114. }
  115. $arNewPieces[$nPriority][] = $strWord;//the word itself
  116. if (isset($arCamelCaseWords[$strWord])) {
  117. $arNewPieces[$nPriority] = array_merge($arNewPieces[$nPriority], $arCamelCaseWords[$strWord]);
  118. }
  119. /*
  120. $strPrefix = substr($strWord, 0, 3);
  121. if ($strPrefix == 'gtk' || $strPrefix == 'gdk' || $strPrefix == 'atk' || $strPrefix == 'pan') {
  122. //pango is 5 chars, all others are 3
  123. $nCutPos = $strPrefix == 'pan' ? 5 : 3;
  124. //classes have gtk or gdk at the beginning, e.g. gtkfixed or gtkoptionmenu
  125. $arNewPieces[$nPriority][] = substr($strWord, $nCutPos);
  126. if (isset($arCamelCaseWords[$strWord])) {
  127. $arNewPieces[$nPriority] = array_merge($arNewPieces[$nPriority], $arCamelCaseWords[$strWord]);
  128. }
  129. }
  130. */
  131. $arMethodPieces = explode( '_', $strWord);
  132. if (count($arMethodPieces) > 1) {
  133. //if you want to remove "get" and "set" from the index, uncomment the following line
  134. //$arMethodPieces = array_diff( $arMethodPieces, self::$arReservedMethods);
  135. $arNewPieces[2] = array_merge($arNewPieces[2], $arMethodPieces);
  136. if (count( $arMethodPieces) > 2) {
  137. //that we have some partly connections like do_this from do_this_thing
  138. foreach ($arMethodPieces as $nId => $strPiece) {
  139. if ($nId < count($arMethodPieces) - 1) {
  140. $arNewPieces[2][] = $strPiece . '_' . $arMethodPieces[$nId + 1];
  141. }
  142. }
  143. }
  144. }
  145. }//foreach piece
  146. //append the search words to the index array
  147. foreach ($arNewPieces as $nPriority => $arPriorityPieces) {
  148. foreach ($arPriorityPieces as $strPiece) {
  149. $arIndex[$strPiece][$nPriority][] = $strFile;
  150. }
  151. }
  152. }
  153. //sort the index | should speed up searching and is nice for debugging
  154. ksort($arIndex);
  155. return $arIndex;
  156. }//protected static function buildIndexFromFiles($arFiles)
  157. /**
  158. * Returns an array of file names from the documentation directory.
  159. * The file names are relative to the doc directory
  160. *
  161. * @param string $strDocumentationDirectory The directory of the compiled manual
  162. * @return array All the files in there
  163. */
  164. protected static function getFiles($strDocumentationDirectory)
  165. {
  166. $strDir = getcwd();
  167. chdir($strDocumentationDirectory);
  168. //php-gtk-web specific
  169. #$arFiles = glob('*/*.{html,php}', GLOB_BRACE);
  170. $arFiles = glob('*.php');
  171. chdir($strDir);
  172. if (count($arFiles) == 0) {
  173. throw new Exception('No files found in ' . $strDocumentationDirectory);
  174. }
  175. return $arFiles;
  176. }//protected static function getFiles($strDocumentationDirectory)
  177. /**
  178. * Returns an array of filenames that should contain title tags
  179. * needed for the camelCase title splitter
  180. *
  181. * @param array $arFiles Array with files that (@see getFiles())
  182. * @return array Array with files that should have needed titles
  183. */
  184. protected static function getTitleFiles($arFiles)
  185. {
  186. $nFiles = count($arFiles);
  187. for ($nA = 0; $nA < $nFiles; $nA++) {
  188. //class files (gtk.gtktreeview.html) or enums (gtk.enum.selectionmode.html)
  189. if (!preg_match('/^[a-z0-9]+\\.(enum\\.)?[a-z0-9]+\\.[a-z]+$/', basename($arFiles[$nA]))) {
  190. unset($arFiles[$nA]);
  191. }
  192. }
  193. return $arFiles;
  194. }//protected static function getTitleFiles($arFiles)
  195. /**
  196. * Returns an array with the contents of the html title tags
  197. * in the given files
  198. *
  199. * @param array $arFiles The files to check
  200. * @param string $strDocumentationDirectory The directory the file names are relative to
  201. *
  202. * @return array Array of titles.
  203. */
  204. protected static function getTitles($arFiles, $strDocumentationDirectory)
  205. {
  206. $arTitles = array();
  207. foreach ($arFiles as $strFile) {
  208. if (substr($strFile, -4) === '.php') {
  209. //.php files (make phpweb) don't have a title header
  210. if (preg_match('/manualHeader\\(\"(.+)\"\\,/', file_get_contents($strDocumentationDirectory . '/' . $strFile), $arMatches)) {
  211. $arTitles[] = $arMatches[1];
  212. }
  213. } else {
  214. if (preg_match('/<title>(.+)<\\/title>/', file_get_contents($strDocumentationDirectory . '/' . $strFile), $arMatches)) {
  215. $arTitles[] = $arMatches[1];
  216. }
  217. }
  218. }
  219. return $arTitles;
  220. }//protected static function getTitles($arFiles, $strDocumentationDirectory)
  221. /**
  222. * Splits all the titles from camelCase into several
  223. * words (camel and case).
  224. *
  225. * @param array $arTitles The titles from the files
  226. * @return array Array with strtolower(word) => split words array
  227. */
  228. protected static function getCamelCaseWords($arTitles)
  229. {
  230. $arSplit = array();
  231. foreach ($arTitles as $strTitle) {
  232. if (strpos($strTitle, ' ') !== false) {
  233. //will be tutorial title or "Gtk functions"
  234. //we don't want this now.
  235. continue;
  236. }
  237. $arSplit[strtolower($strTitle)] = self::varyWords(self::splitCamelCaseWord($strTitle));
  238. }
  239. return $arSplit;
  240. }//protected static function getCamelCaseWords($arTitles)
  241. /**
  242. * Splits a camelCaseWord into words (camel, case, word)
  243. *
  244. * @param string $strWord The word to split
  245. * @return array Array with lowercase words
  246. */
  247. protected static function splitCamelCaseWord($strWord)
  248. {
  249. $strWords = preg_replace('/([A-Z])/', ' \\1', $strWord);
  250. return explode(' ', strtolower($strWords));
  251. }//protected static function splitCamelCaseWord($strWord)
  252. /**
  253. * Makes variations of an array of words.
  254. * E.g. array(Gtk, Tree, View, Column) will get the variations
  255. * gtktree, gtktreeview, treeview, treeviewcolumn, viewcolumn
  256. * added to the word list
  257. *
  258. * @param string $arWords Array with words
  259. * @return array Array with words and their variations
  260. */
  261. protected static function varyWords($arWords)
  262. {
  263. $arVariations = $arWords;
  264. for ($nA = 0; $nA < count($arWords); $nA++) {
  265. $strVariation = '';
  266. for ($nB = $nA; $nB < count($arWords); $nB++) {
  267. $strVariation .= $arWords[$nB];
  268. $arVariations[] = $strVariation;
  269. }
  270. }
  271. return array_unique($arVariations);
  272. }//protected static function varyWords($arWords)
  273. }//class PhpGtkDoc_Search2_Index
  274. ?>