john pfeiffer
  • Home
  • Categories
  • Tags
  • Archives

file extract links

/* john pfeiffer 29feb08    append links from htm(l) file(s) to combined-links.txt -> combined-links.txt
based on 'exportedoperatotext.c 11jan05' & 'link counter' 2004

uses a batch file:
    echo Batch files to get combine all of the bookmarks in this folder using extract-links.c
    FOR %%a IN (*.htm) do extract-links.c.exe %%a 
    rename combined-links.txt combined-links.htm

*/

#include <stdio.h>
#include <stdlib.h>
#define CMDLINE_PARAMETERS 2   /* The first parameter is always the program name */

void test_commandline_parameters( int argc, char* argv[] );

int main(int argc, char* argv[]){

    FILE* fptr;
    FILE* outputptr;

    char c;
    char temp[10]; 
    int i, linkcounter = 0;

    test_commandline_parameters( argc, argv );  /* to ensure we have a filename */

    fptr = fopen(argv[1], "r");
    outputptr = fopen("combined-links.txt", "a+");

    if(fptr == NULL)
    {   printf("error opening input file!");
        exit(1);        }
    if(outputptr == NULL)
    {   printf("error opening output file!");
        exit(1);        }

    do{
        c = getc(fptr);             /*zoom in on a link */
       if( c == '<' )
        {   temp[0] = c;            /*get the beginning of the anchor*/
        c = getc(fptr);
           if( c == 'a' || c =='A' )
            {   temp[1] = c;
            c = getc(fptr);
               if( c == ' ' )
                {   temp[2] = c;
                c = getc(fptr);
                   if( c == 'h' || c =='H' )
                    {   temp[3] = c;
                    c = getc(fptr);
                       if( c == 'r'  || c == 'R')
                        {   temp[4] = c;
                        c = getc(fptr);
                           if( c == 'e'  || c =='E')
                            {   temp[5] = c;
                                c = getc(fptr);
                               if( c == 'f'  || c =='F')
                                {   for(i=0; i<6; i++)  /*we write the "peeked" data*/
                                    {    putc(temp[i], outputptr);  }  /* '<a hre'*/
                                    do{
                                        putc(c, outputptr);     /*starts with the char after 'f'*/
                                        c = getc(fptr);         /*and continue writing to file*/
                                    }while(c != '>');               /*up THROUGH the > character*/
                                    do{
                                        putc(c, outputptr);     /*we must continue to the second*/
                                        c = getc(fptr);         /*close brace char so that the*/
                                    }while(c != '>');               /*name of the link is also taken*/
                                    putc(c, outputptr);     /*not to forget the last char!!!!!*/
                                    putc('<',outputptr);    /*create each link on it's own line*/
                                    putc('b',outputptr);                                
                                    putc('r',outputptr);
                                    putc('>',outputptr);                                
                                    putc('\n',outputptr);
                                    linkcounter++;

                                }/*end if f*/
                            }/*end if e*/
                        }/*end if r*/
                    }/*end if h*/
                }/*end if " "*/
            }/*end if a*/
        }/*end if <*/
    }while( c != EOF);      /*till the end of the file*/

    fclose(fptr);
    fclose(outputptr);
    printf("%d links extracted",linkcounter);

    return 0;
}/*end of main*/

void test_commandline_parameters( int argc, char* argv[] )
{   int i = 0;
    if( argc < CMDLINE_PARAMETERS )
    {   printf("\nMore parameters needed, e.g. %s infile.txt\n",argv[0]);   
        exit(1);
    }
    else if( argc > CMDLINE_PARAMETERS )
    {   printf("\nToo many command line parameters, e.g.\n");   
        exit(1);
    }
    else
    {   while( i < argc )
        {   printf("%s ", argv[i]);     
            i++;
        }
        printf("in progress...\n\n");
    }   
}/* end test_commandline_paramters func */

  • « file extract non html
  • mysitemap »

Published

Feb 29, 2008

Category

c

~349 words

Tags

  • c 95
  • extract 6
  • file 92
  • links 4