/* Program: getlinks.c Purpose: to extract only the links from html files Author: Scott Wenger, 2000 Box 802, Stevens Point, WI 54481 panther@wctc.net This program opens the html file of your choice and prepares a list of the links found in that html file. Note: if you redirect output to a file, this code uses stderr to prevent prompts and messages from appearing in your output file. Example: getlinks>links.txt will direct output to file called links.txt and no prompts will appear in the links.txt file. By default, the output is sent to your screen (stdout.) You are free to use this code in any way that you wish. If you modify the code, please remove author information. */ #include #include #include #include #define MAX_LINE 6000 #define MAX_LINK 500 #define MAX_FILENAME 200 int find_links(); FILE *fp; int link_counter = 0; main() { char filename[MAX_FILENAME]; fprintf(stderr, "\ngetlinks program: extract links from html files."); fprintf(stderr, "\nby Scott Wenger 2000\n"); fprintf(stderr, "\nName of html file to open: "); gets(filename); if ( (fp = fopen(filename, "r")) == NULL) { fprintf(stderr, "\nThe file could not be opened. "); fprintf(stderr, "\nCheck spelling and include path if necessary.\n"); fflush(stdout); exit(1); } find_links(); fprintf(stderr, "\n\nReached end of file. Found %d links.\n", link_counter); fclose(fp); exit(0); } int find_links() { char *pdest; int result; char line_buffer[MAX_LINE]; char link_buffer[MAX_LINK]; int x; int y; while ( (fgets(line_buffer, MAX_LINE, fp)) != NULL) { pdest = strstr( line_buffer, "http:" ); if (pdest != NULL) { result = pdest - line_buffer; link_counter++; x = 0; y = result; // position of start of "http" while (*(line_buffer + y) != '\n' && *(line_buffer + y) != ' ' && *(line_buffer + y) != '"') { *(link_buffer + x) = *(line_buffer + y); x++; y++; } *(link_buffer + x) = '\0'; printf("\n%s", link_buffer); } fflush(stdout); } }