john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

mysitemap

/* john pfeiffer 29feb08    convert the output from extract-links.c into sitemap.xml

<a href='displays_about.asp'
<a href="/pro/index.html" 
INTO
<url><loc>http://anders.co.uk/</loc></url>
NOTE: it requires manually editing afterwards, this just speeds things up
(e.g. http://website.com//pro or http://website.com//website.com)
*/

#include <stdio.h>
#include <stdlib.h>
#define CMDLINE_PARAMETERS 2   /* The first parameter is always the program name */
void test_commandline_parameters( int argc, char* argv[] );

int main(int argc, char* argv[]){

    FILE* fptr;
    FILE* outputptr;

    char c;
    char temp[10]; 
    int linkcounter = 0;

    test_commandline_parameters( argc, argv );  /* to ensure we have a filename */

    fptr = fopen(argv[1], "r");
    outputptr = fopen("sitemap.xml", "w");

    if(fptr == NULL)
    {   printf("error opening input file!");
        exit(1);        }
    if(outputptr == NULL)
    {   printf("error opening output file!");
        exit(1);        }

    do{
        c = getc(fptr);             /*zoom in on a link */
       if( c == '<' )
        {   temp[0] = c;            /*get the beginning of the anchor*/
        c = getc(fptr);
           if( c == 'a' || c =='A' )
            {   temp[1] = c;
            c = getc(fptr);
               if( c == ' ' )
                {   temp[2] = c;
                c = getc(fptr);
                   if( c == 'h' || c =='H' )
                    {   temp[3] = c;
                    c = getc(fptr);
                       if( c == 'r'  || c == 'R')
                        {   temp[4] = c;
                        c = getc(fptr);
                           if( c == 'e'  || c =='E')
                            {   temp[5] = c;
                                c = getc(fptr);
                               if( c == 'f'  || c =='F')
                               {    
                                    c = getc(fptr);  /* = */ 
                                    c = getc(fptr);  /* " or ' */ 
                                    if( c == '\'' )
                                    {   c = getc(fptr);     /* remove the " or ' */
                                        fputs("<url><loc>http://anders.co.uk/",outputptr);
                                        do{

                                            putc(c, outputptr);  
                                            c = getc(fptr);   
                                        }while(c != '\'');     /*we must continue to the second*/
                                        fputs("</loc></url>\n",outputptr);
                                     }
                                     else if( c == '\"' )
                                     {  c = getc(fptr);   
                                        fputs("<url><loc>http://anders.co.uk/",outputptr);
                                        do{
                                            putc(c, outputptr);
                                            c = getc(fptr);   
                                        }while(c != '\"');       /*we must continue to the second*/
                                        fputs("</loc></url>\n",outputptr);
                                     }
                                     else
                                     {  printf("input error after href\n");   
                                        exit(1);
                                     }
                                     linkcounter++;
                                }/*end if f*/
                            }/*end if e*/
                        }/*end if r*/
                    }/*end if h*/
                }/*end if " "*/
            }/*end if a*/
        }/*end if <*/
    }while( c != EOF);      /*till the end of the file*/

    fclose(fptr);
    fclose(outputptr);
    printf("%d links converted",linkcounter);

    return 0;
}/*end of main*/

void test_commandline_parameters( int argc, char* argv[] )
{   int i = 0;
    if( argc < CMDLINE_PARAMETERS )
    {   printf("\nMore parameters needed, e.g. %s infile.txt\n",argv[0]);   
        exit(1);
    }
    else if( argc > CMDLINE_PARAMETERS )
    {   printf("\nToo many command line parameters, e.g.\n");   
        exit(1);
    }
    else
    {   while( i < argc )
        {   printf("%s ", argv[i]);     
            i++;
        }
        printf("in progress...\n\n");
    }   
}/* end test_commandline_paramters func */

  • « file extract links
  • httpgetV4 1 »

Published

Feb 29, 2008

Category

c

~301 words

Tags

  • c 95
  • mysitemap 1