正文

再展出一个解释程序(暨庆祝偶滴BLOG点击次数超过10万)2008-08-20 19:44:00

【评论】 【打印】 【字体: 】 本文链接:http://blog.pfan.cn/sgpro/37724.html

分享到:

  发现2006年3月写的COBOL语言解释程序在偶滴BLOG所有文章中人气最高,偶滴blog点击次数刚刚超过10万,为了祝贺。时隔2年半后再展示一个解释程序,html解释程序。html解释程序是 易扩网络搜索(2007年本人自主开发的搜索引擎)的一个小部分, 事实上为了网络搜索而使用整套解释程序是用高射炮打蚊子的行为,用DOM和regx应该能够实现,当时有这个想法,不过没有付诸行动。 下面是解释系统的框架图 Copyright (C) 2007-8 SGPRO 这个程序比较庞大,所以这里只能展示语法分析部分。 HTML源代码虽然有一套语法规则,但是仍然可以写的相当随意,所以解释程序容错能力要很高,如果是xml语法非常严格,相对应的解释程序应该简单的多(虽然我没有实现过) 语法分析(Syntax Analysis)的代码,不能单独编译,需要词法分析和错误处理等模块协作 下面2个数据结构必须说明  //1、 有限自动机,从html源代码中读取有效单词,目前只支持2种,一种是HTML有效单词,一种是Javascript有效单词 typedef struct  { BOOL (*ReadHTMLToken)(Token *argToken, BOOL filterSpace); BOOL (*ReadJavaScriptToken)(Token *argToken, BOOL filterSpace);}LangDFA; // 2、语法解释器typedef struct{ HTMLObjectTree ObjectTree;   // 对象树,属于解释器的目标生成对象 ObjectList     ObjectArr;              // 对象列表, 对象树的列表结构 HTMLInterpreterError *IntpErr;    // 错误对象句柄 BOOL (*start)(BOOL (*LoadHTMLCode)(char *),   // 启动接口      char filename[],      Log_Type  LogInfor,      int  (*LogFunc)(char strLog[])); HTMLInterpreterError(*GetInterpreterError)();  // 错误处理接口}HTMLInterpreter; //generate the object tree and summary array #include "sgpro_htmlInterpret_io.h"#include "sgpro_htmlInterpret_common.h"#include "sgpro_htmlInterpret_keywords.h"#include "sgpro_htmlInterpret_codebuffer.h" #include "sgpro_htmlInterpret_objecttree.h"#include "sgpro_htmlInterpret_errorhandle.h"#include "sgpro_htmlInterpret.h" #include "comstack.h"    // 公用栈#include "sgpro_abstractstack.h"   // 抽象堆栈 Token  CurrentToken;Stack *ObjectStack; AbstractStack   *ObjTreeStack; HTMLObjectTree ObjectTree;HTMLObjectTree hCurrent;ObjectList*    ObjectArr; char   title[1024];Token  *href;Token  *summary;int    (*WriteSummary)(STRING);int    (*WriteObject)(STRING);int    (*WriteInfor)(STRING); extern HTMLCodeBuffer CodeBuff;    extern LangDFA DFA;     extern HTMLInterpreterError *globalError; extern char CurrentChar; BOOL StmtEndOfFront = false; HTMLInterpreter* CreateHTMLInterpreter(){ int i = 0;  HTMLInterpreter* objHTMLIntp =   (HTMLInterpreter* )malloc(sizeof(HTMLInterpreter)); if ( objHTMLIntp == NULL) {  return objHTMLIntp; }  //Init Error objHTMLIntp->IntpErr = new_HTMLInterperterError(); if (objHTMLIntp->IntpErr == NULL) {  return NULL; } else {  globalError = objHTMLIntp->IntpErr; }  //Init global object array handle ObjectArr = &(objHTMLIntp->ObjectArr);  //Init size ObjectArr->ObjectArrSize = DEF_OBJLIST_SIZE; //Init Current pointer ObjectArr->CurrentObjPointer = 0; //Init  every element of object data array for ( i = 0; i < ObjectArr->ObjectArrSize; i++ ) {  if (!InitHTMLObjectTreeNode(ObjectArr->data + i))  {   SetInterpreterError(ErrType_CreateErr,     "Create Object node error");   return NULL;  } } strcpy(ObjectArr->data[ObjectArr->CurrentObjPointer++].objName,   "sgpro_htmlInterpreter_objectList");   objHTMLIntp->start = HTMLInterpret; objHTMLIntp->GetInterpreterError = GetInterpreterError;  return objHTMLIntp;} BOOL InitHTMLInterpreter(HTMLInterpreter* objHTMLIntp){ int i = 0;  if (objHTMLIntp == NULL) {  return false; }  memset(objHTMLIntp, 0x00, sizeof(objHTMLIntp));  //Init Error objHTMLIntp->IntpErr = new_HTMLInterperterError(); if (objHTMLIntp->IntpErr == NULL) {  return false; } else {  globalError = objHTMLIntp->IntpErr; }  //Init global object array handle ObjectArr = &(objHTMLIntp->ObjectArr);  //Init size ObjectArr->ObjectArrSize = DEF_OBJLIST_SIZE;  ObjectArr->data[0].AttribArrsize = ATTRIBARRSIZE;  //Init  every element of object data array for ( i = 1; i < ObjectArr->ObjectArrSize; i++ ) {  ObjectArr->data[i] = ObjectArr->data[0]; }   objHTMLIntp->start = HTMLInterpret; objHTMLIntp->GetInterpreterError = GetInterpreterError;  return true;} static BOOL HTMLInterpret(BOOL     (*LoadHTMLCode)(char *),       char     filename[],       Log_Type LogInforType,       int      (*LogFunc)(char strLog[])){  BOOL result = false;  if (!CreateToken(&CurrentToken)) {  SetInterpreterError(ErrType_CreateErr,    "Create token error");  return result; } if (!CreateStack(&ObjectStack, CreateToken, 1024)) {  SetInterpreterError(ErrType_CreateErr,    "Create stack error");  return result; }  LogSetting(LogInforType, LogFunc);  if (LoadHTMLCode == NULL) {  CodeBuff.LoadHTMLCode = LoadHTMLCodeByFile;  } else {  CodeBuff.LoadHTMLCode = LoadHTMLCode; }  if (CodeBuff.LoadHTMLCode(filename)) {  if (result = Syntax_Analysis())  {   ShowInfor("\nOK!\n");  }  else  {   ShowInfor("Error Code: %02d\n, Error Message",        GetInterpreterErrorCode(),    GetInterpreterErrorString());    ShowInfor("\nFail!\n");  } }  CloseAllFile();  DestroyStack(&ObjectStack); DestroyToken(&CurrentToken);  return result;} BOOL Syntax_Analysis(){ if (!DFA.ReadHTMLToken(&CurrentToken, true)) {  SetInterpreterError(ErrType_SyntaxErr,  "No Token can be loaded");  return false; } if (!HTMLPage() && CurrentToken.type != null) {  return false; } if (!StackEmpty(*ObjectStack)) {  ShowInfor("Object stack not clear(Last object string:%s) when terminal the analyse!\n",    GetTop(*ObjectStack).string);   if (SetInterpreterError(ErrType_StackErr, ""))  {   sprintf(globalError->message,     "Object stack not clear(Last object string:%s) when terminal the analyse!\n",     GetTop(*ObjectStack).string);  }  return false; } return true;} BOOL HTMLPage(){ if (CurrentToken.type == MarkStart ||   CurrentToken.type == Summary ||  CurrentToken.type == String ||  CurrentToken.type == Space ||  CurrentToken.type == Atom) {  if  (!HTMLStatement(CurrentToken, NodeType_Sub))  {   return false;  }   if ((CurrentToken.type == MarkStart ||   CurrentToken.type == Summary ||   CurrentToken.type == String ||   CurrentToken.type == Space ||   CurrentToken.type == Atom) &&   !HTMLPage())  {   return false;  } } return true;} BOOL HTMLStatement(Token argTK, NodeType type){ BOOL   SummaryStatement = false;  switch (argTK.type) { case Summary: case String: case Space: case Atom:  if (!HTMLSummary(argTK))// statement => SummaryArr  {   return false;  }  else  {   SummaryStatement = true;   ShowInfor("\n");  }  break; case MarkStart: //<  if (!DFA.ReadHTMLToken(&CurrentToken, true))  //filter the < bettween the next token  {   ShowInfor("Repected: HTML Mark, No token can load.");   SetInterpreterError(ErrType_SyntaxErr, "Repected: HTML Mark, No token can load");   return false;  }    if (CurrentToken.type == Summary) //<summary  {   if (!HTMLObjectFront(CurrentToken, type)) // <object | <object/> | <object>   {    return false;   }    switch (CurrentToken.type) // > | /> | >   {   case Atom:    if (IsHTMLMarkEnd(CurrentToken.string[0])) //<object>    {     if (!StackEmpty(*ObjectStack) && !strcmp(GetTop(*ObjectStack).string, "SCRIPT"))     {         if (JavaScript(CurrentToken))     // javascript      {       DFA.ReadHTMLToken(&CurrentToken, true);  // </      }      else      {       return false;      }           }     else     {      DFA.ReadHTMLToken(&CurrentToken, true);            if (!StackEmpty(*ObjectStack) && IsFilter(GetTop(*ObjectStack).string) )        //Filter objects      {       //DFA.ReadHTMLToken(&CurrentToken, true);        while (CurrentToken.type != MarkEndStart && DFA.ReadHTMLToken(&CurrentToken, true))       {        //DFA.ReadHTMLToken(&CurrentToken);        if (CurrentToken.type == MarkEndStart )        {         if (DFA.ReadHTMLToken(&CurrentToken, true))         {          if (!strcmp(GetTop(*ObjectStack).string, strupr(CurrentToken.string)))          {           CodeBuff.RollBackReadPointer();           CodeBuff.RollBackReadPointer();           //CodeBuff.RollBackReadPointer();                    //CodeBuff.RollBackReadPointer();                              CurrentChar = CodeBuff.ReadChar();           DFA.ReadHTMLToken(&CurrentToken, true);           break;          }         }         else         {          ShowInfor("End scan but not match the object :%s\n",            GetTop(*ObjectStack).string);           if (SetInterpreterError(ErrType_SyntaxErr, ""))          {           sprintf(globalError->message,             "End scan but not match the object :%s\n",             GetTop(*ObjectStack).string);          }          }        }       }      }     }      switch (CurrentToken.type) //<object>     {     case MarkEndStart:// <object></      if (!HTMLObjectRear(CurrentToken, type)) //<object></object>      {       return false;      }      StmtEndOfFront = false;      break;// statement => <object></object>     default:            if (!HTMLStatement(CurrentToken,        type))//<object><statement>      {       return false;      }        switch (CurrentToken.type) //<object><statement></ | summary |      {      case MarkEndStart:  //<object><statement></       if (!HTMLObjectRear(CurrentToken, type)) //<object><statement></object>       {        return false;       }        break;      }//<object><statement></object>     }    }    break;   case MarkEndObject:  //<object />    if (!DFA.ReadHTMLToken(&CurrentToken, true))    {     return false;    }    break;    //2007-8-25 break object by <   case MarkStart:    return true;   default:    return false;   }  } }// end statement  if (CurrentToken.type == Summary ||   CurrentToken.type == MarkStart ||  CurrentToken.type == String ||  CurrentToken.type == Space ||  CurrentToken.type == Atom) {  if (!HTMLStatement(CurrentToken,    SummaryStatement? type:NodeType_Next))  //statement | statements  {   return false;  } } return true;} BOOL HTMLObjectFront(Token argTK, NodeType type){ Token stackRet;  static int   Root = 1; int i = 0;  if (argTK.type == Summary) //<name  {  strupr(argTK.string);  if (Push(ObjectStack, CopyToken, argTK))  {   strcpy(ObjectArr->data[ObjectArr->CurrentObjPointer].objName,    argTK.string);   ShowInfor("Start Object: %s\n", GetTop(*ObjectStack).string);  }  else  {   ShowInfor("Object Stack overflow.\n");   SetInterpreterError(ErrType_StackErr, "Object Stack overflow");   return false;  }  //CurrentToken.string;   Object Name; }  DFA.ReadHTMLToken(&CurrentToken, true); switch (CurrentToken.type) {  case Summary: //< name attributes   if (!ObjectAttributes(CurrentToken))    {    return false;   }   break; } //<name attributes /> | <name attributes> | <name> | <name/>  switch (CurrentToken.type) // > | /> {  case Atom:  //case MarkEndObject:   if (IsHTMLMarkEnd(CurrentToken.string[0])) // <name> | <name attributes >   {    if (IsAloneObject(GetTop(*ObjectStack).string))    {// ObjectFront => <BR> | <INPUT>     if (Pop(ObjectStack, &stackRet))     {      ShowInfor("End Object: %s\n", stackRet.string);     }     else     {      ShowInfor("Object Stack empty.\n");      SetInterpreterError(ErrType_StackErr, "Object Stack empty");      return false;     }    }// ObjectFront => <object>       }   break;  case MarkEndObject:    if (Pop(ObjectStack, &stackRet))   {    ShowInfor("End Object: %s\n", stackRet.string);   }   else   {    ShowInfor("Object Stack empty.\n");    SetInterpreterError(ErrType_StackErr, "Object Stack empty");    return false;   }    //<object />   break;   // <name />  case MarkStart:   //break the object 2007-8-25   return true;  default:   return false; }  if (ObjectArr->CurrentObjPointer + 1 >= ObjectArr->ObjectArrSize) {  //ObjectArr->data = (struct _HTMLObjectNode *)realloc(ObjectArr->data, ObjectArr->ObjectArrSize + DEF_OBJLIST_INC);   if (ObjectArr->data == NULL)  {   SetInterpreterError(ErrType_CreateErr,     "Create Object array data error");   return false;  }   //Init  every element of object data array  for ( i = 0; i < DEF_OBJLIST_INC; i++ )  {   if (!InitHTMLObjectTreeNode(ObjectArr->data + ObjectArr->ObjectArrSize + i))   {    SetInterpreterError(ErrType_CreateErr,      "Create Object node error");    return false;   }  }   ObjectArr->ObjectArrSize += DEF_OBJLIST_INC; } ObjectArr->CurrentObjPointer++; return true;} BOOL ObjectAttributes(Token argTK){ if (!AttributeNode(argTK))  //<name attrib {  return false; }  FilterSpace(); switch (CurrentToken.type) { case Summary:  if (!ObjectAttributes(CurrentToken)) // <name attribs...  {   return false;  }  break; case Atom:  while (!IsHTMLMarkEnd(CurrentToken.string[0])) // <name attribs...>  {   ShowInfor("Warning: Repected \">\" Current: %s\n", CurrentToken.string);    //sprintf(globalError->message,    // "Repected: \">\" Current: %s\n", CurrentToken.string);    //SetInterpreterError(ErrType_SyntaxErr, globalError->message);   DFA.ReadHTMLToken(&CurrentToken, true);   //return false;  }  break; case MarkEndObject: //<name attribs... />  break; default:  if (CurrentToken.type != MarkStart)  {   ShowInfor("Warning: Repected \">\" \nCurrent:%s\n", CurrentToken.string);   if (SetInterpreterError(ErrType_SyntaxErr, ""))   {    sprintf(globalError->message,      "Repected: \">\" \nCurrent:%s\n", CurrentToken.string);   }   //return false;  } } return true;} BOOL AttributeNode(Token argTK){ // attributename char name[ATTRIBNAMELEN] = ""; char value[ATTRIBVALUELEN] = ""; int  iObjCurPnt    = ObjectArr->CurrentObjPointer; int  iAttribCurPnt = ObjectArr->data[iObjCurPnt].AttribArrCurrentPointer;  //assign to object tree node ShowInfor("AttriName: %s, ", CurrentToken.string); strcpy(ObjectArr->data[iObjCurPnt].AttribArr[iAttribCurPnt].name,   strupr(CurrentToken.string));   //strcpy(name, CurrentToken.string);  if (!DFA.ReadHTMLToken(&CurrentToken, true)) {  return false; }  switch (CurrentToken.type) { case Atom:  if (!IsEqual(CurrentToken.string[0])) //  attributename =   {   if (IsHTMLMarkEnd(CurrentToken.string[0])) // attributename    {       ShowInfor("Value None.\n");    break;   }   return false;  }   DFA.ReadHTMLToken(&CurrentToken, true);  do  {   if (CurrentToken.type == Space ||  // ' '    CurrentToken.string[0] == '>'  || // >    CurrentToken.type == MarkEndObject || // />    CurrentToken.type == MarkStart ||  // <    CurrentToken.type == MarkEndStart) // </   {    //printf("debug-type");    break;       }   else   {    strcat(value, CurrentToken.string);   }  } while (DFA.ReadHTMLToken(&CurrentToken, false) );   if (strlen(value))  {   ShowInfor( "value: %s\n", value);   strcpy(ObjectArr->data[iObjCurPnt].AttribArr[iAttribCurPnt].value,       value);    ObjectArr->data[iObjCurPnt].AttribArrCurrentPointer++;   if (ObjectArr->data[iObjCurPnt].AttribArrCurrentPointer >    ATTRIBARRSIZE)   {    ShowInfor( "Overflow: attributes array too long!\n");  // =?    SetInterpreterError(ErrType_CreateErr, "Overflow: attributes array too long!");    return false;   }  }  else  {   ShowInfor( "Repected: attribute value!\n");  // =?   SetInterpreterError(ErrType_StackErr, "Repected: attribute value");   return false;  }    break;  // No Value attribute node case Summary: case MarkEndObject:  ShowInfor("Value None.\n");  break; default:    return false; }  return true;} BOOL HTMLObjectRear(Token argTK, NodeType type){ int   index = 0; Token stackRet;  if (argTK.type != MarkEndStart) {  return false; } if (DFA.ReadHTMLToken(&CurrentToken, true) && CurrentToken.type == Summary) {  //object end  if (!IsAloneObject(CurrentToken.string))  {   if (!StackEmpty(*ObjectStack))   {        if (!strcmp(strupr(CurrentToken.string), GetTop(*ObjectStack).string))    {     Pop(ObjectStack, &stackRet);     ShowInfor("End Object: %s\n", stackRet.string);     }    else    {          ShowInfor( "[Warning] Current Object :%s Incompatiable Stack Object: %s, Warning has be passed.\n",       CurrentToken.string, GetTop(*ObjectStack).string);     //2007-06-17 Add     index = IndexOfStack(*ObjectStack, CurrentToken, TokenEqual);     if (index == -1)  // have not the object front, throw the object rear     {      //     }     else      // front of the object have not starting, pop them.     {      while (Pop(ObjectStack, &stackRet))      {       if (!strcmp(strupr(CurrentToken.string), stackRet.string))       {                ShowInfor( "End Object: %s\n", stackRet.string);        break;       }             }     }    }   }   else   {    ShowInfor( "Object Stack empty.\n");    SetInterpreterError(ErrType_StackErr, "Object Stack empty");    return false;   }  }  //</object   while (DFA.ReadHTMLToken(&CurrentToken, true) &&    !IsHTMLMarkEnd(CurrentToken.string[0]));  {   DFA.ReadHTMLToken(&CurrentToken, true);   // ObjectRear => </Object>   return true;  } }    ShowInfor("Repected: HTML object end mark. Current: %s\n",CurrentToken.string); if (SetInterpreterError(ErrType_StackErr, "")) {  sprintf(globalError->message,    "Repected: HTML object end mark. Current: %s\n",CurrentToken.string); }  return false;} BOOL HTMLSummary(Token argTK){ //HTMLObjectTreeNode *p = AbstStackGetPop(ObjectTreeStack); HTMLObjectTreeNode *p; int sumlen = 0; int arglen = strlen(argTK.string);  p = ObjectArr->data + ObjectArr->CurrentObjPointer - 1;  if (argTK.type != MarkStart &&  argTK.type != MarkEndStart &&  argTK.type != null)  //SummaryArr => summary* {  if (!strcmp(argTK.string, "&nbsp;"))  {   strcpy(argTK.string ," ");  }   sumlen = strlen(p->summary);   if(sumlen == 0)  {   p->summary = (char*)malloc(arglen*sizeof(char));   memset(p->summary, '\0', arglen);  }  else  {   p->summary = (char*)realloc(p->summary, (sumlen + arglen) * sizeof(char));   memset(p->summary + sumlen, '\0', arglen);  }  strcat(p->summary, argTK.string);     ShowInfor( "%s", argTK.string);  if (!DFA.ReadHTMLToken(&CurrentToken, true))  {   return true;  }  HTMLSummary(CurrentToken);   } else {  return false; } return true;} void FilterSpace(){ if (CurrentToken.type == Space) {  DFA.ReadHTMLToken(&CurrentToken, true); }} BOOL JavaScript(Token argTK){ BOOL jsarr = true; BOOL ret = false;  do {  while (jsarr)  {   jsarr = jsarr && DFA.ReadJavaScriptToken(&CurrentToken, true);  }   DFA.ReadHTMLToken(&CurrentToken, true);   if (CurrentToken.type == MarkEndStart)  {   if (DFA.ReadHTMLToken(&CurrentToken, true))   {    if (!strcmp(strupr(CurrentToken.string), "SCRIPT"))    {     CodeBuff.RollBackReadPointer();     CodeBuff.RollBackReadPointer();     CurrentChar = CodeBuff.ReadChar();     ret = true;     break;    }   }  }  jsarr = true; } while (ret == false);  return ret;}   下面是以html解释程序为内核的 WIN32应用程序 GUI, 左边是html源代码,右边是解释器输出的解释日志,追踪了对象数组的生成过程。

阅读(2110) | 评论(1)


版权声明:编程爱好者网站为此博客服务提供商,如本文牵涉到版权问题,编程爱好者网站不承担相关责任,如有版权问题请直接与本文作者联系解决。谢谢!

评论

loading...
您需要登录后才能评论,请 登录 或者 注册