cleanup-yahoo.cpp

 
/******************************************************************/
/*                                                                */
/*  This is cleans up html files downloaded from Yahoo! Finance   */
/*                                                                */
/*  Date File Created:     1/01/2003                              */
/*                                                                */
/*  Date Last Modified:    13/01/2003                             */
/*                                                                */
/*   Permission of distribution is granted, provided no           */
/*       alternation, commenting or modification is made          */
/*       towards any parts of the source code and the             */
/*       binary code.                                             */
/*                                                                */
/******************************************************************/

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

char * fggets(char *buf, int bsize, FILE *fp);

int isTradeTerm(char *buf);

void uprStr(char *buf);

void lwrStr(char *buf);

void numericString(char *buf, int N0);

void packString(char *buf, int N0);

int CharCount(char *buf, char cc);

void untagBuf(char *buf);

int nonBlankChar(char *buf);

void rTrim(char *buf);

void lTrim(char *buf);

void main(int argc, char ** argv)
{
  FILE * inf;
  FILE * outf;
  char * command;
  int line_broken = 0;
  double mid;
  double vol;
  double sum;

  //
  // Begin :: statically allocate variables in the memory
  //
  char command_buf[500];
  int  in_content = 0;
  int  line_length = 0;

  char * line;
  char line_buf[5000];

  char * prev_line;
  char prev_line_buf[5000];

  char * new_name;
  char name_buf[500];

  char * pl;
  char * pc;

  char * high_price;
  char hi_price_buf[1500];

  char * low_price;
  char lo_price_buf[1500];

  char * daily_volume;
  char volume_buf[1500];
  //
  // End :: statically allocate variables in the memory
  //
 
  /* prompt usage */
  if ((argc !=2))
    {
      printf("\nUsage:");
      printf("\n    %s file-name\n", argv[0]);
      exit (0);
    }

  //
  // Begin :: set buffers for string pointers
  //
  command = (char *) (&command_buf);
  line = (char *) (&line_buf);
  prev_line = (char *) prev_line_buf;
  new_name = (char *) (&name_buf);
  high_price = (char *) (&hi_price_buf);
  low_price = (char *) (&lo_price_buf);
  daily_volume = (char *) (&volume_buf);
  //
  // End :: set buffers for string pointers
  //
 
  //
  // Begin :: check input file existance
  //
  if ((inf = fopen(argv[1], "r")) == NULL)
    {
      puts(" ");
      puts("File not found.");
      puts(" ");
      exit(0);
    }
  fclose(inf);
  //
  // End :: check input file existance
  //
 
  //
  // Begin :: decide the name for output file
  //
  strcpy(new_name, argv[1]); 
  if (strrchr(new_name, '.') != NULL) {
    char * p0;
    p0 = strrchr(new_name, '.');
    strcpy(line, p0);
    strcpy(p0, ".brief");
    strcat(new_name, line);
    //    printf("%s\n", new_name);
    if ((strstr(new_name, ".HTM") == NULL) && (strstr(new_name, ".htm") == NULL)) {
   strcpy(line, ".htm");
   strcat(new_name, line);
    }
  } else {
    strcat(new_name, ".brief.htm");
  }
  //
  // End :: decide the name for output file
  //

  //
  // Begin :: check the existance of utility program REPLACE.EXE
  //
  if ((inf = fopen("replace.exe", "r")) == NULL)
    {
      puts(" ");
      puts("REPLACE.EXE not found.");
      puts(" ");
      exit(0);
    }
  fclose(inf);
  //
  // End :: check the existance of utility program REPLACE.EXE
  //

  /* change one phrase in temporary file */
  sprintf(command, "replace %s /ltd  /j/ltd ", argv[1]);
  system(command);

  /* change one phrase in temporary file */
  sprintf(command, "replace %s /ltable  /j/ltable ", argv[1]);
  system(command);

  /* change one phrase in temporary file */
  sprintf(command, "replace %s /ltitle  /j/ltitle ", argv[1]);
  system(command);

  /* change one phrase in temporary file */
  sprintf(command, "replace %s /lcenter  /j/lcenter ", argv[1]);
  system(command);

  /* reopen input file */
  if ((inf = fopen(argv[1], "r")) == NULL)
    {
      puts(" ");
      puts("Re open input file failed.");
      puts(" ");
      exit(0);
    }

  /* open output file */
  if ((outf = fopen(new_name, "w")) == NULL)
    {
      puts(" ");
      puts("Open output file failed.");
      puts(" ");
      exit(0);
    } else {
       fprintf(outf, "<html>\n");
    }

  //
  // Begin :: MAIN LOOP - LINE BY LINE PROCESS INPUT FILE
  //
  while (1) {   
    if (feof(inf)) goto END_READ_FILE;
    fggets(line, 5000, inf);

    if (strstr(line, "Australian Markets") != NULL) {
                 in_content = 1;
    }

    if (strstr(line, "U.S. Markets") != NULL) {
                 in_content = 1;
    }

    if (strstr(line, "Nasdaq quotes delayed") != NULL) {
                 in_content = 0;
                 fprintf(outf, "
\n"); } if (strstr(line, "Add to Portfolio") != NULL) { in_content = 0; fprintf(outf, "\n"); } if (strstr(line, "") != NULL) { if (strstr(line, "") != NULL) { fprintf(outf, "%s\n", line); } else { fprintf(outf, "<title>%s\n", line); } fprintf(outf, "<pre class="prettyprint"><code>\n");<br />    } <br /><br />    if (in_content) { <br />         untagBuf(line); <br />         if ((nonBlankChar(line) > 3) && (CharCount(line, ',') < 12) &&<br />     (strstr(line, "Historical Quotes:") == NULL) &&<br />             (CharCount(line, '+') < 12) && (CharCount(line, '\%') < 12)) { <br /><br />     if ((strstr(line, "Last Trade") != NULL) || (strstr(line, "Index Value") != NULL)) {  <br /> //  strcpy(prev_line, "");<br /> strcpy(high_price, "");<br /> strcpy(low_price, "");<br /> strcpy(daily_volume, "");<br /><br />                      fprintf(outf, "</div></div></div><pre>\n");<br />     }<br /><br /> //<br /> // Begin : record highest price & lowest price of the day <br /> //         in Australian styled quote HTML files<br /> //<br />     if ((strstr(line, "Day's Range") != NULL)  && (strstr(line, "-") != NULL) )  {<br /> char * dash;<br /><br /> strcpy(low_price, line);<br /><br /> dash = strstr(line, "-");<br /> if (dash != NULL) {<br /> dash++;<br /> strcpy(high_price, dash);<br /><br /> dash = strstr(low_price, "-");<br /> if (dash != NULL) { dash[0] = 0x0; }<br /><br /> numericString(high_price, line_broken);<br /> numericString(low_price, line_broken);<br /> }<br />                      <br />     }<br /> //<br /> // End : record highest price & lowest price of the day<br /> //         in Australian styled quote HTML files<br /> //<br /><br /> //<br /> // Begin : record highest price & lowest price of the day <br /> //         in American styled quote HTML files<br /> //<br />     if ((strstr(prev_line, "Day's Range") != NULL) && <br />         (strstr(line, "Bid") == NULL) && (strstr(line, "Ask") == NULL) ) {<br /> char * dash;<br />                   //     fprintf(outf, " :: daily range found ::");<br /><br /> strcpy(low_price, line);<br /><br /> dash = strstr(line, "-");<br /> if (dash != NULL) {<br /> dash++;<br /> strcpy(high_price, dash);<br /><br /> dash = strstr(low_price, "-");<br /> if (dash != NULL) { dash[0] = 0x0; }<br /><br /> numericString(high_price, line_broken);<br /> numericString(low_price, line_broken); <br /> }<br />                      <br />     }<br /> //<br /> // End : record highest price & lowest price of the day<br /> //         in American styled quote HTML files<br /> //<br /><br /> //<br /> // Begin : record trade volume of the day<br /> //         in Australian styled quote HTML files<br /> //<br />     if (strstr(line, "Volume") != NULL)  {<br /> strcpy(daily_volume, line); <br /> numericString(daily_volume, 0);<br />     }<br /> //<br /> // End : record trade volume of the day<br /> //         in Australian styled quote HTML files<br /> //<br /><br /> //<br /> // Begin : record trade volume of the day<br /> //         in American styled quote HTML files<br /> // <br />     if ((strstr(prev_line, "Volume") != NULL) &&<br />             (strstr(line, "Div") == NULL) && (strstr(line, "Date") == NULL)) {<br /> // <br /> strcpy(daily_volume, line); <br /> numericString(daily_volume, 0);<br /><br />             //      fprintf(outf, " VOLUME VOLUME $$$$ ");<br /><br />     }<br /> //<br /> // End : record trade volume of the day<br /> //         in American styled quote HTML files<br /> //<br /><br /> //<br /> // Begin : centralize the line for ASX stock symbols and remove a few<br /> // blank spaces before the close bracket !!<br /> //<br />     if (strstr(line, "(ASX:") != NULL)  {<br />                      fprintf(outf, "</div></div></div></code></pre><center><p>______________________</p><p><big><font color="darkblue">\n"); if (strstr(line, ")") != NULL) { char * p8; p8 = strstr(line, ")"); while (p8 > line) { p8--; if (p8[0] > 0x20) { break; } } p8++; p8[0] = ')'; p8++; p8[0] = 0x0; } } // // End : centralize the line for ASX stock symbols and remove blank spaces // if (strstr(line, "Basic Chart") != NULL) { fprintf(outf, "<pre class="prettyprint"><code>\n");<br />     }<br /><br />     if ((strstr(line, "Symbol") != NULL) ||<br />        (strstr(line, "Message Board") != NULL) ||<br />        (strstr(line, "INDU AVERAGE") != NULL) ||<br />        (strstr(line, "INTEL CORP") != NULL) ||<br />        (strstr(line, "S\&amp;P 500 INDEX") != NULL) || <br />        (strstr(line, "INTL BUS MACHINE") != NULL))  {<br />                      fprintf(outf, "<P><center><big>______________________</big></center><P>\n");<br />     }   <br /><br />     if (strstr(line, "Splits:") != NULL)  {<br />                      fprintf(outf, "</code></pre><small><small>\n"); } line_broken = 0; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// if (strstr(line, "Australian Markets") != NULL) { char * p0; fprintf(outf, "<center><h3><font color="darkred">"); p0 = strstr(line, "Australian Markets"); // effectively breaking the text line p0--; p0[0] = 0x0; fprintf(outf, "%s", line); fprintf(outf, "</font></h3></center>"); fprintf(outf, "<br />"); fprintf(outf, "<center><h3><font color="darkred">"); p0++; fprintf(outf, "%s", p0); fprintf(outf, "</font></h3></center>\n"); line_broken = 1; } else { if (strstr(line, "U.S. Markets") != NULL) { char * p0; fprintf(outf, "<center><h3><font color="darkred">"); p0 = strstr(line, "U.S. Markets"); // effectively breaking the text line p0--; p0[0] = 0x0; fprintf(outf, "%s", line); fprintf(outf, "</font></h3></center>"); fprintf(outf, "<br />"); fprintf(outf, "<center><h3><font color="darkred">"); p0++; fprintf(outf, "%s", p0); fprintf(outf, "</font></h3></center>\n"); line_broken = 1; } else { if ((strstr(line, "dyuan71") == NULL) && (strstr(line, " != ") == NULL) && (strstr(line, " == ") == NULL) && (strstr(line, "//--") == NULL) && (strstr(line, "// --") == NULL) && (strstr(line, "Type:") == NULL) && (strstr(line, "Portfolio") == NULL) && (strstr(line, "Size:") == NULL) && (strstr(line, "! View") == NULL) && (strstr(line, "cellspacing") == NULL) && (strstr(line, "cellpadding") == NULL) && (strstr(line, "window.open") == NULL) && (strstr(line, "window.name") == NULL) && (strstr(line, "remote.opener") == NULL) && (strstr(line, "location.href") == NULL) && (strstr(line, "Trades Online") == NULL) && (strstr(line, "Open an account") == NULL) && (strstr(line, "PayDirect") == NULL) && (strstr(line, "Sign Out") == NULL) && (strstr(line, "ADVERTISEMENT") == NULL) && (strstr(line, "Compare:") == NULL) && (strstr(line, "Scale:") == NULL) && (strstr(line, "Real-Time Quotes") == NULL) && (strstr(line, "Moving Average") == NULL) && (! ((nonBlankChar(line) == 18) && (strstr(line, "Nasdaq") != NULL)) ) && (! ((nonBlankChar(line) == 15) && (strstr(line, "Dow") != NULL)) ) && (! ((strstr(line, "Linear") != NULL) && (strstr(line, "Log") != NULL)) ) && (! ((strstr(line, "Quotes") != NULL) && (strstr(line, "Oz 1") != NULL)) ) && (! ((strstr(line, "Quotes") != NULL) && (strstr(line, "OZ1") != NULL)) ) && (! ((strstr(line, "e.g.") != NULL) && (strstr(line, "YHOO") != NULL)) ) && (! ((strstr(line, "commision") != NULL) && (strstr(line, "FREE") != NULL)) ) && (! ((strstr(line, "Ameritrade") != NULL) && (strstr(line, "Streamer") != NULL)) ) && (! ((strstr(line, "Enter") != NULL) && (strstr(line, "Symbol") != NULL)) ) && (! ((strstr(line, "Money") != NULL) && (strstr(line, "Manager") != NULL)) ) && (! ((strstr(line, "Customize") != NULL) && (strstr(line, "Finance") != NULL)) ) && (! ((strstr(line, "vs") != NULL) && (strstr(line, "S\&P") != NULL)) ) && (! ((strstr(line, "var") != NULL) && (strstr(line, "=") != NULL)) ) && (! ((strstr(line, "Bar") != NULL) && (strstr(line, "Line") != NULL)) ) && (! ((strstr(line, "Message") != NULL) && (strstr(line, "Board") != NULL)) ) && (! ((strstr(line, "Info") != NULL) && (strstr(line, "~") != NULL)) ) && (! ((strstr(line, "Bill") != NULL) && (strstr(line, "Pay") != NULL)) ) && (! ((strstr(line, "Funds") != NULL) && (strstr(line, "Transfer") != NULL)) ) && (! ((strstr(line, "Accounts") != NULL) && (strstr(line, "manage") != NULL)) ) && (! ((strstr(line, "Expense") != NULL) && (strstr(line, "Mgr") != NULL)) ) && (! ((strstr(line, "Info") != NULL) && (strstr(line, "Quotes") != NULL)) ) && (! ((strstr(line, "Info") != NULL) && (strstr(line, "Profile") != NULL)) ) && (! ((strstr(line, "Statistics") != NULL) && (strstr(line, "Profile") != NULL)) ) && (! ((strstr(line, "Info") != NULL) && (strstr(line, "News") != NULL)) ) && (! ((strstr(line, "Info") != NULL) && (strstr(line, "Research") != NULL)) ) && (! ((strstr(line, "-1") != NULL) && (strstr(line, "Finance") != NULL)) ) && (! ((strstr(line, "~") != NULL) && (strstr(line, " Bid ") != NULL)) ) && (! ((strstr(line, "~") != NULL) && (strstr(line, " Ask ") != NULL)) ) && (! ((strstr(line, "manage") != NULL) && (strstr(line, "create") != NULL)) ) && (! ((strstr(line, "p=m") != NULL) && (strstr(line, "max") != NULL)) ) && (! ((strstr(line, "z=m") != NULL) && (strstr(line, "max") != NULL)) ) && (! ((strstr(line, "Finance") != NULL) && (strstr(line, "\"") != NULL)) ) && (! ((strstr(line, "function") != NULL) && (strstr(line, "(") != NULL) && (strstr(line, ")") != NULL)) ) && (! ((strstr(line, "is no") != NULL) && (strstr(line, "longer") != NULL) && (strstr(line, "valid") != NULL)) ) && (! ((strstr(line, "Basic") != NULL) && (strstr(line, "Chart") != NULL)) ) && (! ((strstr(line, "6m") != NULL) && (strstr(line, "3m") != NULL)) ) && (! ((strstr(line, "1y") != NULL) && (strstr(line, "2y") != NULL)) ) && (strstr(line, "\&p=m50,m200\&t=5y\&l=on\&z=m\&q=l") == NULL) && (strstr(line, "Components") == NULL)) { ////////////////////////////////////////////////// pl = strstr(line, "Lookup") ; if (pl != NULL) { pl[0] = 0x0; } ////////////////////////////////////////////////// pl = strstr(line, "Look Up") ; if (pl != NULL) { pl[0] = 0x0; } ////////////////////////////////////////////////// rTrim(line); lTrim(line); ////////////////////////////////////////////////// pc = strstr(line, "Change") ; if (pc != NULL) { if (strstr(line, " ") != NULL) { packString(line, 10); } } ////////////////////////////////////////////////// if (isTradeTerm(line) == 1) { fprintf(outf, "%s ", line); } else { fprintf(outf, "%s\n", line); } ////////////////////////////////////////////////// // fprintf(outf, "%d\n", nonBlankChar(line)); ////////////////////////////////////////////////// } line_broken = 0; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // if (strlen(line) > 0) { // strcpy(prev_line, line); // } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// if (line_broken == 1) { fprintf(outf, "</small></small></font></big></p></center><div style="\"display:none\"">"); } if (strstr(line, "(ASX:") != NULL) { fprintf(outf, "<pre class="prettyprint"><code><div style="display:block">");<br />     }<br /><br />     if (strstr(line, "Splits:") != NULL)  {<br />                      fprintf(outf, "</small></small><pre>");<br />     }   <br /><br />     if (strstr(line, "Key Statistic") != NULL) {<br />                      fprintf(outf, "</center></center></big></big></font></font></code></pre>"); fprintf(outf, "<p></p><center><big>______________________</big></center><p>\n"); } if (strstr(line, "No such ticker") != NULL) { fprintf(outf, ""); fprintf(outf, "</p><p></p><center><big>______________________</big></center><p>\n"); } ////////////////////////////////////////////////// if ((strstr(line, "Yield") != NULL) && (strstr(line, "Div") == NULL) ) { // fprintf(outf, "HI \> \> %s\nLO \> \> %s\nVO \> \> %s\n \%\%\%\%\% Yield Yield \%\%\%\%\% ", high_price, low_price, daily_volume); if ((strlen(high_price) > 0) && (strlen(low_price) > 0) && (strlen(daily_volume) > 0)) { // fprintf(outf, "%s ", line); mid = ( atof(high_price) + atof(low_price) ) / 2.0; vol = atof(daily_volume); sum = mid * vol; if (sum 1.0E+9) { sum /= (1.0E+9); fprintf(outf, "Est Value %5.2f Bln \n", sum); } else { if (sum > 1.0E+6) { sum /= (1.0E+6); fprintf(outf, "Est Value %5.2f Mln \n", sum); } else { if (sum > 1.0E+3) { sum /= (1.0E+3); fprintf(outf, "Est Value %5.2f K \n", sum); } else { fprintf(outf, "Est Value %5.2f Only \n", sum); } } } } } } ////////////////////////////////////////////////// // fprintf(outf, "Est Value %5.2f Only \n", sum); ////////////////////////////////////////////////// ////////////////////////////////////////////////// if ((strstr(prev_line, "Div") != NULL) && (strstr(prev_line, "Yield") != NULL)) { if (strlen(line) > 0) { char * p10; char * p11; char * p12; p10= strstr(line, "-"); p11= strstr(line, "("); p12= strstr(line, ")"); // fprintf(outf, "HI \> \> %s\nLO \> \> %s\nVO \> \> %s\n \%\%\%\%\% Yield Yield \%\%\%\%\% ", high_price, low_price, daily_volume); if ((p10 != NULL) || (p11 != NULL) || (p12 != NULL)) { if ((strlen(high_price) > 0) && (strlen(low_price) > 0) && (strlen(daily_volume) > 0)) { // fprintf(outf, "%s ", line); mid = ( atof(high_price) + atof(low_price) ) / 2.0; vol = atof(daily_volume); sum = mid * vol; if (sum 1.0E+9) { sum /= (1.0E+9); fprintf(outf, "Est Value %5.2f Bln \n", sum); } else { if (sum > 1.0E+6) { sum /= (1.0E+6); fprintf(outf, "Est Value %5.2f Mln \n", sum); } else { if (sum > 1.0E+3) { sum /= (1.0E+3); fprintf(outf, "Est Value %5.2f K \n", sum); } else { fprintf(outf, "Est Value %5.2f Only \n", sum); } } } } } } } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// if (strlen(line) > 0) { strcpy(prev_line, line); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// } } } // // End :: MAIN LOOP - LINE BY LINE PROCESS INPUT FILE // END_READ_FILE: fclose(inf); fprintf(outf, "</p></div>

Project Homepage: