Итак, что такое программа? Программа - это алгоритм для выполнения определенных операций.
Программы создаются очень легко. Можно писать программы на PHP:
$result = mysql_query("SELECT * FROM `parser_products`", $conn); while ($row = mysql_fetch_array($result)) { $big_path = false; $small_path = false; $additional_pic = false; $additional_pic_cntr = 0; $prod_name = $row['name']." (".$row['model'].")"; $arr_param[0] = $row['id']; $arr_param[1] = $row['picture']; $arr_param[2] = $row['url']; $param_id = $row['id'];
Или на Python:
import urllib import csv import re import sys # open dates file and read it f = open("dates.txt", "rb") page = f.read() f.close() dates = re.findall(r'(\d{4})-(\d{2})-(\d{2}) *(\d{4})-(\d{2})-(\d{2})', page, re.S|re.I) dates_arr = [] # check dates or exit if dates: f = open("dates.txt", "wb") f.close() syear = int(dates[0][0]) smonth = int(dates[0][1]) sday = int(dates[0][2]) eyear = int(dates[0][3]) emonth = int(dates[0][4]) eday = int(dates[0][5]) startdate = '%4d-%02d-%02d' % (syear, smonth, sday) enddate = '%4d-%02d-%02d' % (eyear, emonth, eday) if (syear > eyear) or (syear == eyear and smonth > emonth) or (syear == eyear and smonth == emonth and sday > eday) or (smonth > 12) or (emonth > 12) or (sday > 31) or (eday > 31): # check dates - is it appropriate? sys.exit() days = 372*(eyear-syear)+31*(emonth-smonth)+(eday-sday) # calculate total days count thisdate = '%4d-%02d-%02d' % (syear, smonth, sday) dates_arr.append(thisdate) for i in range(days): # make all necessary dates sday += 1 if sday > 31: smonth += 1 sday = 1 if smonth > 12: syear += 1 smonth = 1 sday = 1 thisdate = '%4d-%02d-%02d' % (syear, smonth, sday) dates_arr.append(thisdate) else: sys.exit() # get all gids using all dates gids = [] for date in dates_arr: page = urllib.urlopen('http://sports.yahoo.com/nba/scoreboard?d=' + date + '&refresh=0').read() thesegids = re.findall(r'<a href="/nba/boxscore\?gid=(\d+)" class="yspmore">Box Score</a>', page, re.S|re.I) for gid in thesegids: gids.append(gid) # csv headers csv_home = [['Name-H1', 'Min-H1', 'FG-H1', 'FG Attempts-H1', '3Pt-H1', '3Pt Attempts-H1', 'FT-H1', 'FT Attempts-H1', 'Off-H1', 'Reb-H1', 'Ast-H1', 'TO-H1', 'Stl-H1', 'Blk-H1', 'PF-H1', 'Pts-H1', 'DNP-H1', 'Name-H2', 'Min-H2', 'FG-H2', 'FG Attempts-H2', '3Pt-H2', '3Pt Attempts-H2', 'FT-H2', 'FT Attempts-H2', 'Off-H2', 'Reb-H2', 'Ast-H2', 'TO-H2', 'Stl-H2', 'Blk-H2', 'PF-H2', 'Pts-H2', 'DNP-H2', 'Name-H3', 'Min-H3', 'FG-H3', 'FG Attempts-H3', '3Pt-H3', '3Pt Attempts-H3', 'FT-H3', 'FT Attempts-H3', 'Off-H3', 'Reb-H3', 'Ast-H3', 'TO-H3', 'Stl-H3', 'Blk-H3', 'PF-H3', 'Pts-H3', 'DNP-H3', 'Name-H4', 'Min-H4', 'FG-H4', 'FG Attempts-H4', '3Pt-H4', '3Pt Attempts-H4', 'FT-H4', 'FT Attempts-H4', 'Off-H4', 'Reb-H4', 'Ast-H4', 'TO-H4', 'Stl-H4', 'Blk-H4', 'PF-H4', 'Pts-H4', 'DNP-H4', 'Name-H5', 'Min-H5', 'FG-H5', 'FG Attempts-H5', '3Pt-H5', '3Pt Attempts-H5', 'FT-H5', 'FT Attempts-H5', 'Off-H5', 'Reb-H5', 'Ast-H5', 'TO-H5', 'Stl-H5', 'Blk-H5', 'PF-H5', 'Pts-H5', 'DNP-H5', 'Name-H6', 'Min-H6', 'FG-H6', 'FG Attempts-H6', '3Pt-H6', '3Pt Attempts-H6', 'FT-H6', 'FT Attempts-H6', 'Off-H6', 'Reb-H6', 'Ast-H6', 'TO-H6', 'Stl-H6', 'Blk-H6', 'PF-H6', 'Pts-H6', 'DNP-H6', 'Name-H7', 'Min-H7', 'FG-H7', 'FG Attempts-H7', '3Pt-H7', '3Pt Attempts-H7', 'FT-H7', 'FT Attempts-H7', 'Off-H7', 'Reb-H7', 'Ast-H7', 'TO-H7', 'Stl-H7', 'Blk-H7', 'PF-H7', 'Pts-H7', 'DNP-H7', 'Name-H8', 'Min-H8', 'FG-H8', 'FG Attempts-H8', '3Pt-H8', '3Pt Attempts-H8', 'FT-H8', 'FT Attempts-H8', 'Off-H8', 'Reb-H8', 'Ast-H8', 'TO-H8', 'Stl-H8', 'Blk-H8', 'PF-H8', 'Pts-H8', 'DNP-H8', 'Name-H9', 'Min-H9', 'FG-H9', 'FG Attempts-H9', '3Pt-H9', '3Pt Attempts-H9', 'FT-H9', 'FT Attempts-H9', 'Off-H9', 'Reb-H9', 'Ast-H9', 'TO-H9', 'Stl-H9', 'Blk-H9', 'PF-H9', 'Pts-H9', 'DNP-H9', 'Name-H10', 'Min-H10', 'FG-H10', 'FG Attempts-H10', '3Pt-H10', '3Pt Attempts-H10', 'FT-H10', 'FT Attempts-H10', 'Off-H10', 'Reb-H10', 'Ast-H10', 'TO-H10', 'Stl-H10', 'Blk-H10', 'PF-H10', 'Pts-H10', 'DNP-H10', 'Name-H11', 'Min-H11', 'FG-H11', 'FG Attempts-H11', '3Pt-H11', '3Pt Attempts-H11', 'FT-H11', 'FT Attempts-H11', 'Off-H11', 'Reb-H11', 'Ast-H11', 'TO-H11', 'Stl-H11', 'Blk-H11', 'PF-H11', 'Pts-H11', 'DNP-H11', 'Name-H12', 'Min-H12', 'FG-H12', 'FG Attempts-H12', '3Pt-H12', '3Pt Attempts-H12', 'FT-H12', 'FT Attempts-H12', 'Off-H12', 'Reb-H12', 'Ast-H12', 'TO-H12', 'Stl-H12', 'Blk-H12', 'PF-H12', 'Pts-H12', 'DNP-H12', 'Name-H13', 'Min-H13', 'FG-H13', 'FG Attempts-H13', '3Pt-H13', '3Pt Attempts-H13', 'FT-H13', 'FT Attempts-H13', 'Off-H13', 'Reb-H13', 'Ast-H13', 'TO-H13', 'Stl-H13', 'Blk-H13', 'PF-H13', 'Pts-H13', 'DNP-H13', 'Name-H14', 'Min-H14', 'FG-H14', 'FG Attempts-H14', '3Pt-H14', '3Pt Attempts-H14', 'FT-H14', 'FT Attempts-H14', 'Off-H14', 'Reb-H14', 'Ast-H14', 'TO-H14', 'Stl-H14', 'Blk-H14', 'PF-H14', 'Pts-H14', 'DNP-H14', ''],] csv_away = [['Name-A1', 'Min-A1', 'FG-A1', 'FG Attempts-A1', '3Pt-A1', '3Pt Attempts-A1', 'FT-A1', 'FT Attempts-A1', 'Off-A1', 'Reb-A1', 'Ast-A1', 'TO-A1', 'Stl-A1', 'Blk-A1', 'PF-A1', 'Pts-A1', 'DNP-A1', 'Name-A2', 'Min-A2', 'FG-A2', 'FG Attempts-A2', '3Pt-A2', '3Pt Attempts-A2', 'FT-A2', 'FT Attempts-A2', 'Off-A2', 'Reb-A2', 'Ast-A2', 'TO-A2', 'Stl-A2', 'Blk-A2', 'PF-A2', 'Pts-A2', 'DNP-A2', 'Name-A3', 'Min-A3', 'FG-A3', 'FG Attempts-A3', '3Pt-A3', '3Pt Attempts-A3', 'FT-A3', 'FT Attempts-A3', 'Off-A3', 'Reb-A3', 'Ast-A3', 'TO-A3', 'Stl-A3', 'Blk-A3', 'PF-A3', 'Pts-A3', 'DNP-A3', 'Name-A4', 'Min-A4', 'FG-A4', 'FG Attempts-A4', '3Pt-A4', '3Pt Attempts-A4', 'FT-A4', 'FT Attempts-A4', 'Off-A4', 'Reb-A4', 'Ast-A4', 'TO-A4', 'Stl-A4', 'Blk-A4', 'PF-A4', 'Pts-A4', 'DNP-A4', 'Name-A5', 'Min-A5', 'FG-A5', 'FG Attempts-A5', '3Pt-A5', '3Pt Attempts-A5', 'FT-A5', 'FT Attempts-A5', 'Off-A5', 'Reb-A5', 'Ast-A5', 'TO-A5', 'Stl-A5', 'Blk-A5', 'PF-A5', 'Pts-A5', 'DNP-A5', 'Name-A6', 'Min-A6', 'FG-A6', 'FG Attempts-A6', '3Pt-A6', '3Pt Attempts-A6', 'FT-A6', 'FT Attempts-A6', 'Off-A6', 'Reb-A6', 'Ast-A6', 'TO-A6', 'Stl-A6', 'Blk-A6', 'PF-A6', 'Pts-A6', 'DNP-A6', 'Name-A7', 'Min-A7', 'FG-A7', 'FG Attempts-A7', '3Pt-A7', '3Pt Attempts-A7', 'FT-A7', 'FT Attempts-A7', 'Off-A7', 'Reb-A7', 'Ast-A7', 'TO-A7', 'Stl-A7', 'Blk-A7', 'PF-A7', 'Pts-A7', 'DNP-A7', 'Name-A8', 'Min-A8', 'FG-A8', 'FG Attempts-A8', '3Pt-A8', '3Pt Attempts-A8', 'FT-A8', 'FT Attempts-A8', 'Off-A8', 'Reb-A8', 'Ast-A8', 'TO-A8', 'Stl-A8', 'Blk-A8', 'PF-A8', 'Pts-A8', 'DNP-A8', 'Name-A9', 'Min-A9', 'FG-A9', 'FG Attempts-A9', '3Pt-A9', '3Pt Attempts-A9', 'FT-A9', 'FT Attempts-A9', 'Off-A9', 'Reb-A9', 'Ast-A9', 'TO-A9', 'Stl-A9', 'Blk-A9', 'PF-A9', 'Pts-A9', 'DNP-A9', 'Name-A10', 'Min-A10', 'FG-A10', 'FG Attempts-A10', '3Pt-A10', '3Pt Attempts-A10', 'FT-A10', 'FT Attempts-A10', 'Off-A10', 'Reb-A10', 'Ast-A10', 'TO-A10', 'Stl-A10', 'Blk-A10', 'PF-A10', 'Pts-A10', 'DNP-A10', 'Name-A11', 'Min-A11', 'FG-A11', 'FG Attempts-A11', '3Pt-A11', '3Pt Attempts-A11', 'FT-A11', 'FT Attempts-A11', 'Off-A11', 'Reb-A11', 'Ast-A11', 'TO-A11', 'Stl-A11', 'Blk-A11', 'PF-A11', 'Pts-A11', 'DNP-A11', 'Name-A12', 'Min-A12', 'FG-A12', 'FG Attempts-A12', '3Pt-A12', '3Pt Attempts-A12', 'FT-A12', 'FT Attempts-A12', 'Off-A12', 'Reb-A12', 'Ast-A12', 'TO-A12', 'Stl-A12', 'Blk-A12', 'PF-A12', 'Pts-A12', 'DNP-A12', 'Name-A13', 'Min-A13', 'FG-A13', 'FG Attempts-A13', '3Pt-A13', '3Pt Attempts-A13', 'FT-A13', 'FT Attempts-A13', 'Off-A13', 'Reb-A13', 'Ast-A13', 'TO-A13', 'Stl-A13', 'Blk-A13', 'PF-A13', 'Pts-A13', 'DNP-A13', 'Name-A14', 'Min-A14', 'FG-A14', 'FG Attempts-A14', '3Pt-A14', '3Pt Attempts-A14', 'FT-A14', 'FT Attempts-A14', 'Off-A14', 'Reb-A14', 'Ast-A14', 'TO-A14', 'Stl-A14', 'Blk-A14', 'PF-A14', 'Pts-A14', 'DNP-A14', ''],] csv_box = [['Away Team', 'AT 1st quarter', 'AT 2nd quarter', 'AT 3rd quarter', 'AT 4th quarter', 'AT 1st OT', 'AT 2nd OT', 'AT 3rd OT', 'HT Final', 'Home Team', 'HT 1st quarter', 'HT 2nd quarter', 'HT 3rd quarter', 'HT 4th quarter', 'HT 1st OT', 'HT 2nd OT', 'HT 3rd OT', 'HT Final', 'Min-AT', 'FG-AT', '3Pt-AT', 'FT-AT', 'Off-AT', 'Reb-AT', 'Ast-AT', 'TO-AT', 'Stl-AT', 'Blk-AT', 'PF-AT', 'Team Rebounds-AT', 'Min-HT', 'FG-HT', '3Pt-HT', 'FT-HT', 'Off-HT', 'Reb-HT', 'Ast-HT', 'TO-HT', 'Stl-HT', 'Blk-HT', 'PF-HT', 'Team Rebounds-HT', 'Technical Fouls', 'Attendance', 'Officials', 'URL'],] csv_players = [['First Name', 'Last Name', 'Position', 'Team', 'Height', 'Weight', 'DOB', 'College', 'Draft Year'],] players_urls = [] # get data from pages with 'gid' for gid in gids: url = 'http://sports.yahoo.com/nba/boxscore?gid=' + gid page = urllib.urlopen(url).read() #page = open('test.txt', 'rb').read() blocks = re.findall(r'<table[^<]+?<tr class=" *yspsctbg(.*?)</table>', page, re.S|re.I) is_home = 0 home_fields = [] away_fields = [] box_fields = [] for block in blocks: # get each player's data with regular expressions trs = re.findall(r'<tr class=" *ysprow.*?</tr>', block, re.I|re.S) for tr in trs: fields_arr = re.findall(r'<a href="/nba/players/(\d+)[^>]*?>(.*?)</a>.*?colspan.*?>()()()()()()()()()()()()()()()(.*?)</td>', tr, re.I|re.S) if not fields_arr: fields_arr = re.findall(r'<a href="/nba/players/(\d+)[^>]*?>(.*?)</a>.*?<td>([\d\:]+).*?<td>(\d+)-(\d+).*?<td>(\d+)-(\d+).*?<td>(\d+)-(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?', tr, re.I|re.S) for fields in fields_arr: players_urls.append('http://sports.yahoo.com/nba/players/' + fields[0]) for field in fields[1:]: if not is_home: away_fields.append(field.strip()) else: home_fields.append(field.strip()) is_home = 1 # get game data with regular expressions top_scores = re.findall(r'<td align="left" class="yspscores.*?([A-Z][\w\s]+)<.*?class="yspscores">(\d*)</td>.*?class="yspscores">(\d*)</td>.*?class="yspscores">(\d*)</td>.*?class="yspscores">(\d*)</td>()()().*?class="yspscores.*?(\d+)', page, re.S|re.I) totals = re.findall(r'Totals.*?<td>([\d\:]+)?.*?<td>(\d+-\d+).*?<td>(\d+-\d+).*?<td>(\d+-\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?<td>(\d+).*?Team Rebounds.*?(\d+)', page, re.I|re.S) game_info = re.findall(r'Technical Fouls.*?>(.*?)<.*?Attendance:</b>(.*?)<.*?Officials:</b>(.*?)<', page, re.I|re.S) for top_score in top_scores: for field in top_score: box_fields.append(field.strip()) for total in totals: for field in total: box_fields.append(field.strip()) for info in game_info: for field in info: box_fields.append(field.strip()) box_fields.append(url) csv_home.append(home_fields) csv_away.append(away_fields) csv_box.append(box_fields) # get demographics data for each player for player_url in players_urls: page = urllib.urlopen(player_url).read() player_fields = [] fields = re.findall(r'<li class="player-name">(.*?) +(.*?)</li>.*?<li class="position">(.*?)</li>.*?<li class="team-name">.*?<a href.*?>(.*?)</a>.*?Height:</strong>(.*?)</li>.*?Weight:</strong>(.*?)</li>.*?Born:</strong>(.*?)</li>.*?College:</strong>(.*?)</li>.*?Draft:</strong>(.*?)</li>', page, re.I|re.S) for field in fields[0]: player_fields.append(field.strip()) csv_players.append(player_fields) # write fetched data writer = csv.writer(open("home" + startdate + "-" + enddate + ".txt", "wb")) writer.writerows(csv_home) writer = csv.writer(open("away" + startdate + "-" + enddate + ".txt", "wb")) writer.writerows(csv_away) writer = csv.writer(open("box" + startdate + "-" + enddate + ".txt", "wb")) writer.writerows(csv_box) writer = csv.writer(open("players" + startdate + "-" + enddate + ".txt", "wb")) writer.writerows(csv_players)
Скрипты - это программы, выполняющие какие-то несложные действия, например, парсинг или обработку текста. Скрипты чаще всего ориентированы на использование в сети Интернет. Основными языками для написания скриптов являются PHP, Perl, Python.
Например, очень легко реализовать парсинг фидов с помощью несложного скрипта на PHP. Получится примерно такое: