# 10/10/07: by Cheng # convert Affy SNP annotation file to dchip genome information file, and reorder # SNPs according to cdf file order ("Mapping250K_Sty psi.txt", which can be found in # the library files containing the CDF file) #openfile = open("Mapping250K_Sty.na23.annot.csv", "r") openfile = open("Mapping250K_Nsp.na23.annot.csv", "r") #outfile = open("Mapping250k_Sty_genome_info_hg18.txt", "w") outfile = open("Mapping250k_Nsp_genome_info_hg18.txt", "w") #define a dictionary to store SNP information info = {} line_cnt = 0 for line in openfile: if not (line[0] == '#' or line[1] == '#'): line = line.replace('\"', '') words = line.split(",") #print words[0:6] outline = words[0] + "\t" + words[3]+ "\t" + words[4]+ "\t\t" + \ words[5] + "\t" + words[2] + "\n" info[words[0]] = outline # add snp information if line_cnt == 0: #headerline outfile.write(outline) if (line_cnt % 1000 == 0): print line_cnt line_cnt += 1 openfile.close() #openfile = open("Mapping250K_Sty psi.txt", "r") openfile = open("Mapping250K_Nsp psi.txt", "r") line_cnt = 0 for line in openfile: if line[0] != '#': words = line.split("\t") if info.has_key(words[1]): #print words[1], info[words[1]], outfile.write(info[words[1]]) if (line_cnt % 1000 == 0): print line_cnt line_cnt += 1 openfile.close() outfile.close()