school/cs236/lexer/lexi.cpp

362 lines
10 KiB
C++

#include "lexi.h"
vector<token> lexi::lexical_analyzer(vector<string> data, string file_name) {
string cur_string;
string next_character;
for(unsigned int i = 0; i < data.size(); i++) {
for(unsigned int j = 0; j < data[i].size(); j ++) {
cur_string = data[i].at(j);
if(j < data[i].size() - 1) {
next_character = data[i].at(j + 1);
}
else {
next_character = "";
}
string state = determiner(cur_string, next_character);
simple_state(data, state, cur_string, next_character, i, j);
simple_state_string(data, state, cur_string, next_character, i, j);
if(state == "start of string") {
string token_symbol = string_finder(data, i, j);
if(token_symbol != "error") {
token_symbol.erase(0,1);
token t("STRING", token_symbol, i + 1);
tokens.push_back(t);
}
else {
write_to_file(file_name, i + 1);
return tokens;
}
}
simple_comment(data, state, cur_string, next_character, i, j);
simple_id(data, state, cur_string, next_character, i, j);
if(state == "error") {
write_to_file(file_name, i + 1);
return tokens;
}
}
}
write_to_file(file_name);
return tokens;
}
void lexi::write_to_file(string file_name, int line) {
ofstream myfile;
myfile.open(file_name.c_str());
myfile << "Error on line " << line << endl;
myfile.close();
}
void lexi::write_to_file(string file_name) {
ofstream myfile;
myfile.open(file_name.c_str());
for(unsigned int i = 0; i < tokens.size(); i++) {
if(i < tokens.size()) {
myfile << tokens[i] << endl;
}
else {
myfile << tokens[i];
}
}
myfile << "Total Tokens = " << tokens.size();
myfile << endl;
myfile.close();
}
bool lexi::simple_comment(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
if(state == "comment") {
string token_symbol = comment_finder(data, i, j);
}
return true;
}
bool lexi::simple_id(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
if(state == "id") {
string token_symbol = id_finder(data, i, j);
if(token_symbol != "error") {
token t("ID", token_symbol, i + 1);
tokens.push_back(t);
}
}
return true;
}
bool lexi::simple_state_string(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
if(state == "simple_string") {
string token_symbol = det_type_simple_string(data, i, j);
if(token_symbol != "wrong") {
string token_type = type_simple_string(token_symbol);
token t(token_type, token_symbol, i + 1);
tokens.push_back(t);
}
else {
string token_symbol = id_finder(data, i, j);
if(token_symbol != "error") {
token t("ID", token_symbol, i + 1);
tokens.push_back(t);
}
}
}
return true;
}
bool lexi::simple_state(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
if(state == "simple") {
string token_symbol = type_simple(cur_string, next_character);
if(next_character == "-") {
data[i].replace(j,2, " ");
}
else {
data[i].replace(j,1, " ");
}
string token_id = type_simple_caps(cur_string, next_character);
token t(token_id, token_symbol, i + 1);
tokens.push_back(t);
}
return true;
}
string lexi::determiner(string cur_string, string next_character) {
if(det_help_simple(cur_string, next_character)) {
return "simple";
}
else if(det_help_simple_string(cur_string, next_character)) {
return "simple_string";
}
else if(det_help_id(cur_string)) {
return "id";
}
else if(cur_string == "'") {
return "start of string";
}
else if(cur_string == "#") {
return "comment";
}
else {
string temp = incorrect(cur_string);
return temp;
}
return "";
}
bool lexi::det_help_id(string cur_string) {
if(('A' <= cur_string[0] && cur_string[0] <= 'Z') ||
('a' <= cur_string[0] && cur_string[0] <= 'z')) {
return true;
}
return false;
}
bool lexi::quick_help(string a, string b) {
if(a == "S" && b == "c") {
return true;
}
return false;
}
bool lexi::det_help_simple_string(string cur_string, string next_character) {
if(quick_help(cur_string, next_character)) {
return true;
}
else if((cur_string == "Q" && next_character == "u")) {
return true;
}
else if((cur_string == "R" && next_character == "u")) {
return true;
}
else if((cur_string == "F" && next_character == "a")) {
return true;
}
return false;
}
bool lexi::det_help_simple(string cur_string, string next_character) {
if(cur_string == "," || cur_string == "." || cur_string == "?" ||
cur_string == "(" || cur_string == ")" || cur_string == ":") {
type_simple(cur_string, next_character);
return true;
}
return false;
}
string lexi::incorrect(string cur_string) {
if(cur_string == " " || cur_string == "\t") {
return "fine";
}
else if(!(('A' <= cur_string[0] && cur_string[0] <= 'Z') ||
('a' <= cur_string[0] && cur_string[0] <= 'z'))) {
return "error";
}
return " ";
}
string lexi::id_finder(vector<string> & data, int a, int b) {
string cur_string;
string next_character;
for(unsigned int j = b; j < data[a].size(); j++) {
cur_string += data[a].at(j);
if(j < data[a].size() - 1) {
next_character = data[a].at(j + 1);
}
else {
next_character = "!";
}
if(is_char_valid(next_character[0]) || next_character == "!") {
data[a].replace(data[a].begin() + b, data[a].begin() + j + 1, " ");
return cur_string;
}
}
return " ";
}
string lexi::comment_finder(vector<string> & data, int i, int b) {
string cur_string;
string next_character;
for(unsigned int j = b; j < data[i].size(); j++) {
cur_string += data[i].at(j);
if(j < data[i].size() - 1) {
next_character = data[i].at(j + 1);
}
else {
next_character = "!";
}
if((j > data[i].size()) - 1 && next_character != "!") {
data[i].replace(data[i].begin() + b, data[i].end(), " ");
return cur_string;
}
}
return "error";
}
string lexi::string_finder(vector<string> & data, int a, int b) {
string cur_string;
string next_character;
b = data[a].find('\'');
for(unsigned int j = b; j < data[a].size(); j++) {
cur_string += data[a].at(j);
if(j < data[a].size() - 1) {
next_character = data[a].at(j + 1);
}
if(next_character == "'") {
data[a].replace(data[a].begin() + b, data[a].begin() + j + 2, " ");
data[a].insert(data[a].begin() + b, ' ');
return cur_string;
}
}
return "error";
}
string lexi::type_simple_caps(string symbol, string next_symbol) {
if(symbol == ",") {
return "COMMA";
}
else if(symbol == ".") {
return "PERIOD";
}
else if(symbol == "?") {
return "Q_MARK";
}
else if(symbol == "(") {
return "LEFT_PAREN";
}
else if(symbol == ")") {
return "RIGHT_PAREN";
}
else if(symbol == ":") {
if(next_symbol == "-") {
return "COLON_DASH";
}
return "COLON";
}
return "";
}
string lexi::type_simple(string symbol, string next_symbol) {
if(symbol == ",") {
return ",";
}
else if(symbol == ".") {
return ".";
}
else if(symbol == "?") {
return "?";
}
else if(symbol == "(") {
return "(";
}
else if(symbol == ")") {
return ")";
}
else if(symbol == ":") {
if(next_symbol == "-") {
return ":-";
}
return ":";
}
return "";
}
string lexi::det_type_simple_string(vector<string> & data, int i, int b) {
string cur_string;
string next_character;
string special_case;
if(b > 0) {
special_case = data[i].at(b -1);
}
for(unsigned int j = b; j < data[i].size(); j++) {
cur_string += data[i].at(j);
if(j < data[i].size() - 1) {
next_character = data[i].at(j + 1);
}
else {
next_character = "!";
}
if((is_simple_string(cur_string)) && (is_char_valid(next_character.at(0))) && (is_char_valid(special_case[0]))) {
data[i].replace(data[i].begin() + b, data[i].begin() + j + 1, " ");
return cur_string;
}
}
return "wrong";
}
bool lexi::is_char_valid(char next_character) {
if(!(('A' <= next_character && next_character <= 'Z') ||
('a' <= next_character && next_character <= 'z') ||
('0' <= next_character && next_character <= '9')) || (next_character == '\'')) {
return true;
}
return false;
}
bool lexi::is_simple_string(string simple_com) {
if(simple_com == "Schemes") {
return true;
}
else if(simple_com == "Facts") {
return true;
}
else if(simple_com == "Rules") {
return true;
}
else if(simple_com == "Queries") {
return true;
}
return false;
}
string lexi::type_simple_string(string simple_com) {
if(simple_com == "Schemes") {
return "SCHEMES";
}
else if(simple_com == "Facts") {
return "FACTS";
}
else if(simple_com == "Rules") {
return "RULES";
}
else if(simple_com == "Queries") {
return "QUERIES";
}
return "";
}