Contains the source code from the course work throughout my undergraduate Computer Engineering degree at Brigham Young University. There is a mixture of Go, Python, C, C++, Java, VHDL, Verilog, Matlab, Bash, Assembly, etc..
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexi.cpp 10.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. #include "lexi.h"
  2. vector<token> lexi::lexical_analyzer(vector<string> data, string file_name) {
  3. string cur_string;
  4. string next_character;
  5. for(unsigned int i = 0; i < data.size(); i++) {
  6. for(unsigned int j = 0; j < data[i].size(); j ++) {
  7. cur_string = data[i].at(j);
  8. if(j < data[i].size() - 1) {
  9. next_character = data[i].at(j + 1);
  10. }
  11. else {
  12. next_character = "";
  13. }
  14. string state = determiner(cur_string, next_character);
  15. simple_state(data, state, cur_string, next_character, i, j);
  16. simple_state_string(data, state, cur_string, next_character, i, j);
  17. if(state == "start of string") {
  18. string token_symbol = string_finder(data, i, j);
  19. if(token_symbol != "error") {
  20. token_symbol.erase(0,1);
  21. token t("STRING", token_symbol, i + 1);
  22. tokens.push_back(t);
  23. }
  24. else {
  25. write_to_file(file_name, i + 1);
  26. return tokens;
  27. }
  28. }
  29. simple_comment(data, state, cur_string, next_character, i, j);
  30. simple_id(data, state, cur_string, next_character, i, j);
  31. if(state == "error") {
  32. write_to_file(file_name, i + 1);
  33. return tokens;
  34. }
  35. }
  36. }
  37. write_to_file(file_name);
  38. return tokens;
  39. }
  40. void lexi::write_to_file(string file_name, int line) {
  41. ofstream myfile;
  42. myfile.open(file_name.c_str());
  43. myfile << "Error on line " << line << endl;
  44. myfile.close();
  45. }
  46. void lexi::write_to_file(string file_name) {
  47. ofstream myfile;
  48. myfile.open(file_name.c_str());
  49. for(unsigned int i = 0; i < tokens.size(); i++) {
  50. if(i < tokens.size()) {
  51. myfile << tokens[i] << endl;
  52. }
  53. else {
  54. myfile << tokens[i];
  55. }
  56. }
  57. myfile << "Total Tokens = " << tokens.size();
  58. myfile << endl;
  59. myfile.close();
  60. }
  61. bool lexi::simple_comment(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
  62. if(state == "comment") {
  63. string token_symbol = comment_finder(data, i, j);
  64. }
  65. return true;
  66. }
  67. bool lexi::simple_id(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
  68. if(state == "id") {
  69. string token_symbol = id_finder(data, i, j);
  70. if(token_symbol != "error") {
  71. token t("ID", token_symbol, i + 1);
  72. tokens.push_back(t);
  73. }
  74. }
  75. return true;
  76. }
  77. bool lexi::simple_state_string(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
  78. if(state == "simple_string") {
  79. string token_symbol = det_type_simple_string(data, i, j);
  80. if(token_symbol != "wrong") {
  81. string token_type = type_simple_string(token_symbol);
  82. token t(token_type, token_symbol, i + 1);
  83. tokens.push_back(t);
  84. }
  85. else {
  86. string token_symbol = id_finder(data, i, j);
  87. if(token_symbol != "error") {
  88. token t("ID", token_symbol, i + 1);
  89. tokens.push_back(t);
  90. }
  91. }
  92. }
  93. return true;
  94. }
  95. bool lexi::simple_state(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
  96. if(state == "simple") {
  97. string token_symbol = type_simple(cur_string, next_character);
  98. if(next_character == "-") {
  99. data[i].replace(j,2, " ");
  100. }
  101. else {
  102. data[i].replace(j,1, " ");
  103. }
  104. string token_id = type_simple_caps(cur_string, next_character);
  105. token t(token_id, token_symbol, i + 1);
  106. tokens.push_back(t);
  107. }
  108. return true;
  109. }
  110. string lexi::determiner(string cur_string, string next_character) {
  111. if(det_help_simple(cur_string, next_character)) {
  112. return "simple";
  113. }
  114. else if(det_help_simple_string(cur_string, next_character)) {
  115. return "simple_string";
  116. }
  117. else if(det_help_id(cur_string)) {
  118. return "id";
  119. }
  120. else if(cur_string == "'") {
  121. return "start of string";
  122. }
  123. else if(cur_string == "#") {
  124. return "comment";
  125. }
  126. else {
  127. string temp = incorrect(cur_string);
  128. return temp;
  129. }
  130. return "";
  131. }
  132. bool lexi::det_help_id(string cur_string) {
  133. if(('A' <= cur_string[0] && cur_string[0] <= 'Z') ||
  134. ('a' <= cur_string[0] && cur_string[0] <= 'z')) {
  135. return true;
  136. }
  137. return false;
  138. }
  139. bool lexi::quick_help(string a, string b) {
  140. if(a == "S" && b == "c") {
  141. return true;
  142. }
  143. return false;
  144. }
  145. bool lexi::det_help_simple_string(string cur_string, string next_character) {
  146. if(quick_help(cur_string, next_character)) {
  147. return true;
  148. }
  149. else if((cur_string == "Q" && next_character == "u")) {
  150. return true;
  151. }
  152. else if((cur_string == "R" && next_character == "u")) {
  153. return true;
  154. }
  155. else if((cur_string == "F" && next_character == "a")) {
  156. return true;
  157. }
  158. return false;
  159. }
  160. bool lexi::det_help_simple(string cur_string, string next_character) {
  161. if(cur_string == "," || cur_string == "." || cur_string == "?" ||
  162. cur_string == "(" || cur_string == ")" || cur_string == ":") {
  163. type_simple(cur_string, next_character);
  164. return true;
  165. }
  166. return false;
  167. }
  168. string lexi::incorrect(string cur_string) {
  169. if(cur_string == " " || cur_string == "\t") {
  170. return "fine";
  171. }
  172. else if(!(('A' <= cur_string[0] && cur_string[0] <= 'Z') ||
  173. ('a' <= cur_string[0] && cur_string[0] <= 'z'))) {
  174. return "error";
  175. }
  176. return " ";
  177. }
  178. string lexi::id_finder(vector<string> & data, int a, int b) {
  179. string cur_string;
  180. string next_character;
  181. for(unsigned int j = b; j < data[a].size(); j++) {
  182. cur_string += data[a].at(j);
  183. if(j < data[a].size() - 1) {
  184. next_character = data[a].at(j + 1);
  185. }
  186. else {
  187. next_character = "!";
  188. }
  189. if(is_char_valid(next_character[0]) || next_character == "!") {
  190. data[a].replace(data[a].begin() + b, data[a].begin() + j + 1, " ");
  191. return cur_string;
  192. }
  193. }
  194. return " ";
  195. }
  196. string lexi::comment_finder(vector<string> & data, int i, int b) {
  197. string cur_string;
  198. string next_character;
  199. for(unsigned int j = b; j < data[i].size(); j++) {
  200. cur_string += data[i].at(j);
  201. if(j < data[i].size() - 1) {
  202. next_character = data[i].at(j + 1);
  203. }
  204. else {
  205. next_character = "!";
  206. }
  207. if((j > data[i].size()) - 1 && next_character != "!") {
  208. data[i].replace(data[i].begin() + b, data[i].end(), " ");
  209. return cur_string;
  210. }
  211. }
  212. return "error";
  213. }
  214. string lexi::string_finder(vector<string> & data, int a, int b) {
  215. string cur_string;
  216. string next_character;
  217. b = data[a].find('\'');
  218. for(unsigned int j = b; j < data[a].size(); j++) {
  219. cur_string += data[a].at(j);
  220. if(j < data[a].size() - 1) {
  221. next_character = data[a].at(j + 1);
  222. }
  223. if(next_character == "'") {
  224. data[a].replace(data[a].begin() + b, data[a].begin() + j + 2, " ");
  225. data[a].insert(data[a].begin() + b, ' ');
  226. return cur_string;
  227. }
  228. }
  229. return "error";
  230. }
  231. string lexi::type_simple_caps(string symbol, string next_symbol) {
  232. if(symbol == ",") {
  233. return "COMMA";
  234. }
  235. else if(symbol == ".") {
  236. return "PERIOD";
  237. }
  238. else if(symbol == "?") {
  239. return "Q_MARK";
  240. }
  241. else if(symbol == "(") {
  242. return "LEFT_PAREN";
  243. }
  244. else if(symbol == ")") {
  245. return "RIGHT_PAREN";
  246. }
  247. else if(symbol == ":") {
  248. if(next_symbol == "-") {
  249. return "COLON_DASH";
  250. }
  251. return "COLON";
  252. }
  253. return "";
  254. }
  255. string lexi::type_simple(string symbol, string next_symbol) {
  256. if(symbol == ",") {
  257. return ",";
  258. }
  259. else if(symbol == ".") {
  260. return ".";
  261. }
  262. else if(symbol == "?") {
  263. return "?";
  264. }
  265. else if(symbol == "(") {
  266. return "(";
  267. }
  268. else if(symbol == ")") {
  269. return ")";
  270. }
  271. else if(symbol == ":") {
  272. if(next_symbol == "-") {
  273. return ":-";
  274. }
  275. return ":";
  276. }
  277. return "";
  278. }
  279. string lexi::det_type_simple_string(vector<string> & data, int i, int b) {
  280. string cur_string;
  281. string next_character;
  282. string special_case;
  283. if(b > 0) {
  284. special_case = data[i].at(b -1);
  285. }
  286. for(unsigned int j = b; j < data[i].size(); j++) {
  287. cur_string += data[i].at(j);
  288. if(j < data[i].size() - 1) {
  289. next_character = data[i].at(j + 1);
  290. }
  291. else {
  292. next_character = "!";
  293. }
  294. if((is_simple_string(cur_string)) && (is_char_valid(next_character.at(0))) && (is_char_valid(special_case[0]))) {
  295. data[i].replace(data[i].begin() + b, data[i].begin() + j + 1, " ");
  296. return cur_string;
  297. }
  298. }
  299. return "wrong";
  300. }
  301. bool lexi::is_char_valid(char next_character) {
  302. if(!(('A' <= next_character && next_character <= 'Z') ||
  303. ('a' <= next_character && next_character <= 'z') ||
  304. ('0' <= next_character && next_character <= '9')) || (next_character == '\'')) {
  305. return true;
  306. }
  307. return false;
  308. }
  309. bool lexi::is_simple_string(string simple_com) {
  310. if(simple_com == "Schemes") {
  311. return true;
  312. }
  313. else if(simple_com == "Facts") {
  314. return true;
  315. }
  316. else if(simple_com == "Rules") {
  317. return true;
  318. }
  319. else if(simple_com == "Queries") {
  320. return true;
  321. }
  322. return false;
  323. }
  324. string lexi::type_simple_string(string simple_com) {
  325. if(simple_com == "Schemes") {
  326. return "SCHEMES";
  327. }
  328. else if(simple_com == "Facts") {
  329. return "FACTS";
  330. }
  331. else if(simple_com == "Rules") {
  332. return "RULES";
  333. }
  334. else if(simple_com == "Queries") {
  335. return "QUERIES";
  336. }
  337. return "";
  338. }