/** * Copyright 2013 Andy Turner, The University of Leeds, UK * * Redistribution and use of this software in source and binary forms, with or * without modification is permitted. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package uk.ac.leeds.ccg.andyt.projects.geog3600.rebecca; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.StreamTokenizer; import java.util.HashMap; import java.util.HashSet; import java.util.logging.Level; import java.util.logging.Logger; import uk.ac.leeds.ccg.andyt.generic.io.Generic_StaticIO; /** * A class for adding coordinate data and area codes for UK postcodes. This * class was originally developed for Rebecca Dashfield for an undergraduate * Geography dissertation at the University of Leeds in 2013. The class is self * contained and relies only on the core Java language. (The program does not * rely on any other libraries to run). * * The program first input the National Postcode Directory August 2011 data * which was downloaded via the EDINA UKBorders Service. * * In a revision a more recent ONS postcode directory was obtained in csv * format via the following URL: * http://www.ons.gov.uk/ons/guide-method/geography/products/postcode-directories/-nspp-/index.html * * The program also inputs data files that are processed by adding columns next * to the postcodes including the census output area codes and Ordnance Survey * Great Britain Grid Projection Easting and Northing coordinates. */ public class PostcodeGeocoder { /** * A name for exception and error handling */ private static final String className = "PostcodeGeocoder"; /** * File directory from which data is read and written to. */ private File directory; /** * Creates a new instance of this class using directory. * * @param directory */ public PostcodeGeocoder(File directory) { this.directory = directory; } /** * This is the main method where the program begins execution. One or no * arguments are expected. A single argument gives the file location of the * input data. Without this argument it is assumed that the program is run * from the location of the input data. The output will also be written to * this location. The input data are expected in 4 files, 3 of which are * expected in containing File directories as follows: * "STANDARD_WARD_LUT/STANDARD_WARD_LUT.csv"; * "COUNCIL_AREA_LUT/COUNCIL_AREA_LUT.csv"; * "OA_TO_HIGHER_AREAS_LUT/OA_TO_HIGHER_AREAS_LUT.csv". The other file is * expected to be named "scotland- output area OAC dan vickers data.csv". * This file contains population data and area classifications at Output * Area level derived from the 2001 UK Population census. These data were * derived by Dan Vickers. * * @param args the command line arguments */ public static void main(String[] args) { // Check args File directory = null; if (args.length == 0) { //directory = new File(System.getProperty("user.dir")); directory = new File("/scratch01/Work/geog3600/Rebecca/"); } else { if (args.length == 1) { directory = new File(args[0]); } } if (directory.exists() == false) { System.err.println( "Directory " + directory + " does not exist. Program exiting."); System.exit(2); } new PostcodeGeocoder(directory).run(); } public void run() { String filename; File inputDirectory = new File( directory, "input"); File outputDirectory = new File( directory, "output"); File file; //Iterator ite; HashSet numerals_HashSet = getNumeralsHashSet(); // // Read NPD into a lookup // HashMap lookup; //// filename = "ONSPD_AUG_2011_UK_O.csv"; //// File NPDDirectory = new File( //// inputDirectory, //// "NPD"); //// file = new File( //// NPDDirectory, //// filename); //// lookup = readONSPD( //// file, //// numerals_HashSet); // File lookupFile = new File( // directory, // "lookup_HashmapStringString.thisFile"); //// Generic_StaticIO.writeObject(lookup, lookupFile); // lookup = (HashMap) Generic_StaticIO.readObject(lookupFile); // Read NPD into postcode lookup HashMap lookup; filename = "ONSPD_FEB_2013_UK_O.csv"; File NPDDirectory = new File( inputDirectory, "ONSPD_FEB_2013_csv"); // File NPDDataDirectory = new File( // NPDDirectory, // "Data"); // file = new File( // NPDDataDirectory, // filename); // lookup = readONSPD2( // file, // numerals_HashSet); File lookupFile = new File( directory, "lookup_HashmapStringString.thisFile"); // Generic_StaticIO.writeObject(lookup, lookupFile); lookup = (HashMap) Generic_StaticIO.readObject(lookupFile); // Read NPD OutputArea code mapping // There are two OA codes and this is a lookup from one to another. filename = "oacode_new_to_old.txt"; File NPDDocumentsDirectory = new File( NPDDirectory, "Documents"); File NPDDocumentsLookUpsDirectory = new File( NPDDocumentsDirectory, "Look-ups"); file = new File( NPDDocumentsLookUpsDirectory, filename); HashMap oaCodeLookUp = readONSPD_OACodeLookup( file, numerals_HashSet); File oaCodeLookUpFile = new File( directory, "oaCodeLookUp_HashmapStringString.thisFile"); Generic_StaticIO.writeObject(oaCodeLookUp, oaCodeLookUpFile); //oaCodeLookUp = (HashMap) Generic_StaticIO.readObject(lookupFile); File outputFile; // Second homes filename = "Second homes.csv"; file = new File( inputDirectory, filename); outputFile = new File( outputDirectory, filename); //processSecondHomes(file, outputFile, lookup); processSecondHomes(file, outputFile, lookup, oaCodeLookUp); // Primary addresses filename = "primaryaddresses.csv"; file = new File( inputDirectory, filename); outputFile = new File( outputDirectory, filename); //processPrimaryAddresses(file, outputFile, lookup); processPrimaryAddresses(file, outputFile, lookup, oaCodeLookUp); // Write PostCodeArea output // filename = "PostcodeAreaGroceryRetailerData.csv"; // outputFile = new File( // directory, // filename); // pw = null; // try { // pw = new PrintWriter(outputFile); // } catch (IOException aIOException) { // System.err.println(aIOException.getMessage() + " in " // + this.getClass().getName() + ".run()"); // System.exit(2); // } // line = "Postcode Area,Total Population,Total Floor Area"; // System.out.println(line); // pw.println(line); // ite = theGBDataPostcodeAreaPopulation_HashMap.keySet().iterator(); // while (ite.hasNext()) { // String postcode = ite.next(); // BigDecimal retailFloorAreaEstimate = postcodeAreaGroceryRetailerData.get(postcode); // if (retailFloorAreaEstimate == null) { // retailFloorAreaEstimate = BigDecimal.ZERO; // } // Long population = theGBDataPostcodeAreaPopulation_HashMap.get(postcode); // line = // postcode + "," + // population + "," + // retailFloorAreaEstimate; // System.out.println(line); // pw.println(line); // } // pw.close(); // } public void processSecondHomes( File file, File outputFile, HashMap lookup, HashMap oaCodeLookup) { PrintWriter pw = null; try { pw = new PrintWriter(outputFile); StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; String postcode; long x; long y; String OA; String OA2; String OA2011; String OA20112; String WARD; String MSOA; String LSOA; //Skip the first line skipline(aStreamTokenizer); int tokenType; tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); if (line.contains("\"")) { String[] fields = line.split("\""); String[] fields2 = fields[2].split(","); if (fields2.length < 6) { //System.out.println(fields[2]); postcode = " "; } else { postcode = fields2[5]; } } else { String[] fields = line.split(","); if (fields.length < 7) { if (fields.length < 6) { System.out.println(line); postcode = " "; } else { postcode = fields[5]; } } else { postcode = fields[6]; } } if (postcode.length() > 0) { if (postcode.charAt(0) == ' ') { postcode = postcode.substring(1); } } String postcodelookup = lookup.get(postcode); if (postcodelookup == null) { System.out.println("postcodelookup == null"); System.out.println("postcode " + postcode); System.out.println(line); x = 0; y = 0; OA = ""; OA2 = ""; OA2011 = ""; OA20112 = ""; WARD = ""; MSOA = ""; LSOA = ""; } else { //System.out.println(postcodelookup); String[] fields = postcodelookup.split("\",\""); if (fields.length != 47) { int debug = 1; } OA = fields[33]; OA2 = oaCodeLookup.get(OA); OA2011 = fields[41]; OA20112 = oaCodeLookup.get(OA2011); WARD = fields[35]; MSOA = fields[37]; LSOA = fields[39]; x = getX(postcodelookup); y = getY(postcodelookup); } String outline = OA + ", " + OA2 + ", " + OA2011 + ", " + OA20112 + ", " + x + ", " + y + ", " + line; //String outline = OA + ", " + OA2 + ", " + WARD + ", " + MSOA + ", " + LSOA + ", " + x + ", " + y + ", " + line; //line += ", " + x + ", " + y; //pw.println(line); pw.println(outline); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".processSecondHomes(File)"); System.exit(2); } finally { pw.close(); } } public void processSecondHomes( File file, File outputFile, HashMap lookup) { PrintWriter pw = null; try { pw = new PrintWriter(outputFile); StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; String postcode; long x; long y; String OA; String WARD; String MSOA; String LSOA; //Skip the first line skipline(aStreamTokenizer); int tokenType; tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); if (line.contains("\"")) { String[] fields = line.split("\""); String[] fields2 = fields[2].split(","); if (fields2.length < 6) { //System.out.println(fields[2]); postcode = " "; } else { postcode = fields2[5]; } } else { String[] fields = line.split(","); if (fields.length < 7) { if (fields.length < 6) { System.out.println(line); postcode = " "; } else { postcode = fields[5]; } } else { postcode = fields[6]; } } if (postcode.length() > 0) { if (postcode.charAt(0) == ' ') { postcode = postcode.substring(1); } } String postcodelookup = lookup.get(postcode); if (postcodelookup == null) { System.out.println("postcodelookup == null"); System.out.println("postcode " + postcode); System.out.println(line); x = 0; y = 0; OA = ""; WARD = ""; MSOA = ""; LSOA = ""; } else { //System.out.println(postcodelookup); String[] fields = postcodelookup.split("\",\""); if (fields.length != 47) { int debug = 1; } OA = fields[33]; WARD = fields[35]; MSOA = fields[37]; LSOA = fields[39]; x = getX(postcodelookup); y = getY(postcodelookup); } String outline = OA + ", " + WARD + ", " + MSOA + ", " + LSOA + ", " + x + ", " + y + ", " + line; //line += ", " + x + ", " + y; //pw.println(line); pw.println(outline); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".processSecondHomes(File)"); System.exit(2); } finally { pw.close(); } } public void processPrimaryAddresses( File file, File outputFile, HashMap lookup, HashMap oaCodeLookup) { PrintWriter pw = null; try { pw = new PrintWriter(outputFile); StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; String postcode; long x; long y; String OA; String OA2; String OA2011; String OA20112; String WARD; String MSOA; String LSOA; //Skip the first line skipline(aStreamTokenizer); int tokenType; tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); if (line.contains("\"")) { String[] fields = line.split("\""); String[] fields2 = fields[2].split(","); if (fields2.length < 4) { //System.out.println(fields[2]); postcode = " "; } else { postcode = fields2[3]; } } else { String[] fields = line.split(","); if (fields.length < 4) { //postcode = fields[2]; postcode = ""; } else { postcode = fields[3]; } } if (postcode.length() > 0) { if (postcode.charAt(0) == ' ') { postcode = postcode.substring(1); } } String postcodelookup = lookup.get(postcode); if (postcodelookup == null) { System.out.println("postcodelookup == null"); System.out.println("postcode " + postcode); System.out.println(line); x = 0; y = 0; OA = ""; OA2 = ""; OA2011 = ""; OA20112 = ""; WARD = ""; MSOA = ""; LSOA = ""; } else { //System.out.println(postcodelookup); String[] fields = postcodelookup.split("\",\""); if (fields.length != 46) { int debug = 1; } OA = fields[33]; OA2 = oaCodeLookup.get(OA); OA2011 = fields[41]; OA20112 = oaCodeLookup.get(OA2011); WARD = fields[35]; MSOA = fields[37]; LSOA = fields[39]; x = getX(postcodelookup); y = getY(postcodelookup); } String outline = OA + ", " + OA2 + ", " + OA2011 + ", " + OA20112 + ", " + x + ", " + y + ", " + line; //line += ", " + x + ", " + y; //pw.println(line); pw.println(outline); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".processPrimaryAddresses(File)"); System.exit(2); } finally { pw.close(); } } public void processPrimaryAddresses( File file, File outputFile, HashMap lookup) { PrintWriter pw = null; try { pw = new PrintWriter(outputFile); StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; String postcode; long x; long y; String OA; String WARD; String MSOA; String LSOA; //Skip the first line skipline(aStreamTokenizer); int tokenType; tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); if (line.contains("\"")) { String[] fields = line.split("\""); String[] fields2 = fields[2].split(","); if (fields2.length < 4) { //System.out.println(fields[2]); postcode = " "; } else { postcode = fields2[3]; } } else { String[] fields = line.split(","); if (fields.length < 4) { //postcode = fields[2]; postcode = ""; } else { postcode = fields[3]; } } if (postcode.length() > 0) { if (postcode.charAt(0) == ' ') { postcode = postcode.substring(1); } } String postcodelookup = lookup.get(postcode); if (postcodelookup == null) { System.out.println("postcodelookup == null"); System.out.println("postcode " + postcode); System.out.println(line); x = 0; y = 0; OA = ""; WARD = ""; MSOA = ""; LSOA = ""; } else { //System.out.println(postcodelookup); String[] fields = postcodelookup.split("\",\""); if (fields.length != 47) { int debug = 1; } OA = fields[33]; WARD = fields[35]; MSOA = fields[37]; LSOA = fields[39]; x = getX(postcodelookup); y = getY(postcodelookup); } String outline = OA + ", " + WARD + ", " + MSOA + ", " + LSOA + ", " + x + ", " + y + ", " + line; //line += ", " + x + ", " + y; //pw.println(line); pw.println(outline); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".processPrimaryAddresses(File)"); System.exit(2); } finally { pw.close(); } } public long getX(String s) { String[] fields = s.split("\",\""); return Long.valueOf(fields[9]); } public long getY(String s) { String[] fields = s.split("\",\""); return Long.valueOf(fields[10]); } public HashMap readONSPD( File file, HashSet numerals_HashSet) { HashMap result = new HashMap(); try { int lineCounter = 0; int recordCounter = 0; StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType; //skipline(aStreamTokenizer); tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: String[] fields = line.split("\",\""); // if (fields.length != 47) { // System.out.println(lineCounter + " " + line); // System.out.println("fields.length " + fields.length); // } if (fields[2].startsWith("PE31 8")) { //System.out.println(recordCounter + ", " + lineCounter + ", " + line); System.out.println(recordCounter + ", " + lineCounter + ", " + fields[2] + ", " + fields[9] + ", " + fields[10]); result.put(fields[2], line); recordCounter++; } lineCounter++; break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readONSPD(File)"); System.exit(2); } return result; } public HashMap readONSPD2( File file, HashSet numerals_HashSet) { HashMap result = new HashMap(); try { int lineCounter = 0; int recordCounter = 0; StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType; //skipline(aStreamTokenizer); tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line + " " + line); String[] fields = line.split("\",\""); //System.out.println("fields.length " + fields.length); if (fields.length != 46) { System.out.println(lineCounter + " " + line); System.out.println("fields.length " + fields.length); } if (fields[2].startsWith("PE31 8")) { //System.out.println(recordCounter + ", " + lineCounter + ", " + line); System.out.println(recordCounter + ", " + lineCounter + ", " + fields[2] + ", " + fields[9] + ", " + fields[10]); result.put(fields[2], line); recordCounter++; } lineCounter++; break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readONSPD2(File)"); System.exit(2); } return result; } public HashMap readONSPD_OACodeLookup( File file, HashSet numerals_HashSet) { HashMap result = new HashMap(); try { int lineCounter = 0; int recordCounter = 0; StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType; //skipline(aStreamTokenizer); tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line + " " + line); String[] fields = line.split(" "); //System.out.println("fields.length " + fields.length); if (fields.length != 2) { System.out.println(lineCounter + " " + line); System.out.println("fields.length " + fields.length); } result.put(fields[0], fields[1]); lineCounter++; break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readONSPD2(File)"); System.exit(2); } return result; } private long format(String s) { long result; //try { //System.out.println(s); String[] populationParts = s.split(","); String population = ""; for (int i = 0; i < populationParts.length; i++) { population = population + populationParts[i]; } if (population.equalsIgnoreCase("")) { population = "0"; } result = Long.valueOf(population); //System.out.println(result); // } catch (ArrayIndexOutOfBoundsException e) { // int debug = 1; // } return result; } public HashSet getNumeralsHashSet() { HashSet numerals_HashSet = new HashSet(); numerals_HashSet.add("0"); numerals_HashSet.add("1"); numerals_HashSet.add("2"); numerals_HashSet.add("3"); numerals_HashSet.add("4"); numerals_HashSet.add("5"); numerals_HashSet.add("6"); numerals_HashSet.add("7"); numerals_HashSet.add("8"); numerals_HashSet.add("9"); return numerals_HashSet; } private void skipline(StreamTokenizer aStreamTokenizer) { int tokenType; try { tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } } catch (IOException ex) { Logger.getLogger(PostcodeGeocoder.class.getName()).log( Level.SEVERE, null, ex); } } private StreamTokenizer getStreamTokeniser(File file) { StreamTokenizer aStreamTokenizer = null; try { BufferedReader aBufferedReader = new BufferedReader( new InputStreamReader( new FileInputStream(file))); aStreamTokenizer = new StreamTokenizer(aBufferedReader); aStreamTokenizer.resetSyntax(); aStreamTokenizer.wordChars(',', ','); aStreamTokenizer.wordChars('"', '"'); aStreamTokenizer.wordChars('\'', '\''); aStreamTokenizer.wordChars('&', '&'); aStreamTokenizer.wordChars(';', ';'); aStreamTokenizer.wordChars('(', '('); aStreamTokenizer.wordChars(')', ')'); aStreamTokenizer.wordChars('0', '0'); aStreamTokenizer.wordChars('1', '1'); aStreamTokenizer.wordChars('2', '2'); aStreamTokenizer.wordChars('3', '3'); aStreamTokenizer.wordChars('4', '4'); aStreamTokenizer.wordChars('5', '5'); aStreamTokenizer.wordChars('6', '6'); aStreamTokenizer.wordChars('7', '7'); aStreamTokenizer.wordChars('8', '8'); aStreamTokenizer.wordChars('9', '9'); aStreamTokenizer.wordChars('.', '.'); aStreamTokenizer.wordChars('-', '-'); aStreamTokenizer.wordChars('+', '+'); aStreamTokenizer.wordChars('a', 'z'); aStreamTokenizer.wordChars('A', 'Z'); aStreamTokenizer.wordChars('\t', '\t'); aStreamTokenizer.wordChars(' ', ' '); aStreamTokenizer.wordChars('#', '#'); aStreamTokenizer.wordChars('*', '*'); aStreamTokenizer.wordChars(':', ':'); String s = "/"; char c = s.charAt(0); int c_int = (int) c; //System.out.println("s " + s + " c " + c + " c_int " + c_int) ; aStreamTokenizer.wordChars(c_int, c_int); aStreamTokenizer.eolIsSignificant(true); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".getStreamTokeniser(File)"); System.exit(2); } return aStreamTokenizer; } }