/* john pfeiffer 29feb08 convert the output from extract-links.c into sitemap.xml
<a href='displays_about.asp'
<a href="/pro/index.html"
INTO
<url><loc>http://anders.co.uk/</loc></url>
NOTE: it requires manually editing afterwards, this just speeds things up
(e.g. http://website.com//pro or http://website.com//website.com)
*/
#include <stdio.h>
#include <stdlib.h>
#define CMDLINE_PARAMETERS 2 /* The first parameter is always the program name */
void test_commandline_parameters( int argc, char* argv[] );
int main(int argc, char* argv[]){
FILE* fptr;
FILE* outputptr;
char c;
char temp[10];
int linkcounter = 0;
test_commandline_parameters( argc, argv ); /* to ensure we have a filename */
fptr = fopen(argv[1], "r");
outputptr = fopen("sitemap.xml", "w");
if(fptr == NULL)
{ printf("error opening input file!");
exit(1); }
if(outputptr == NULL)
{ printf("error opening output file!");
exit(1); }
do{
c = getc(fptr); /*zoom in on a link */
if( c == '<' )
{ temp[0] = c; /*get the beginning of the anchor*/
c = getc(fptr);
if( c == 'a' || c =='A' )
{ temp[1] = c;
c = getc(fptr);
if( c == ' ' )
{ temp[2] = c;
c = getc(fptr);
if( c == 'h' || c =='H' )
{ temp[3] = c;
c = getc(fptr);
if( c == 'r' || c == 'R')
{ temp[4] = c;
c = getc(fptr);
if( c == 'e' || c =='E')
{ temp[5] = c;
c = getc(fptr);
if( c == 'f' || c =='F')
{
c = getc(fptr); /* = */
c = getc(fptr); /* " or ' */
if( c == '\'' )
{ c = getc(fptr); /* remove the " or ' */
fputs("<url><loc>http://anders.co.uk/",outputptr);
do{
putc(c, outputptr);
c = getc(fptr);
}while(c != '\''); /*we must continue to the second*/
fputs("</loc></url>\n",outputptr);
}
else if( c == '\"' )
{ c = getc(fptr);
fputs("<url><loc>http://anders.co.uk/",outputptr);
do{
putc(c, outputptr);
c = getc(fptr);
}while(c != '\"'); /*we must continue to the second*/
fputs("</loc></url>\n",outputptr);
}
else
{ printf("input error after href\n");
exit(1);
}
linkcounter++;
}/*end if f*/
}/*end if e*/
}/*end if r*/
}/*end if h*/
}/*end if " "*/
}/*end if a*/
}/*end if <*/
}while( c != EOF); /*till the end of the file*/
fclose(fptr);
fclose(outputptr);
printf("%d links converted",linkcounter);
return 0;
}/*end of main*/
void test_commandline_parameters( int argc, char* argv[] )
{ int i = 0;
if( argc < CMDLINE_PARAMETERS )
{ printf("\nMore parameters needed, e.g. %s infile.txt\n",argv[0]);
exit(1);
}
else if( argc > CMDLINE_PARAMETERS )
{ printf("\nToo many command line parameters, e.g.\n");
exit(1);
}
else
{ while( i < argc )
{ printf("%s ", argv[i]);
i++;
}
printf("in progress...\n\n");
}
}/* end test_commandline_paramters func */