/** * Copyright 2012 Andy Turner, The University of Leeds, UK * * Redistribution and use of this software in source and binary forms, with or * without modification is permitted. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package uk.ac.leeds.ccg.andyt.projects.geog3600.greg; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.StreamTokenizer; import java.math.BigDecimal; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.logging.Level; import java.util.logging.Logger; /** * A class for aggregating Postcode data for the UK. This class was originally * developed for Gregory Bromley for an undergraduate Geography dissertation at * the University of Leeds in 2012. The class is self contained and relies only * on the core Java language. (The program does not rely on any other libraries * to run). * * * * The program was developed to use 3 look up tables which were made available * by the EDINA UKBorders Service. These data provide codes and names for * producing aggregate outputs for Standard Wards in Scotland which took into * account output area classifications as produced by Dan Vickers. The output * file generated is (like the input files) an ASCII comma separated version * file. This file is output in a file named "AggregatedData.csv" with a header * line. The six fields as per the header line are: * STWardCode,"STWardName","Council Area Name",NumberOfOutputAreas, * TotalPopulation,TotalProsperingSuburbPopulation". */ public class PostcodeAggregator { /** * A name for exception and error handling */ private static final String className = "PostcodeAggregator"; /** * File directory from which data is read and written to. */ private File directory; /** * Creates a new instance of this class using directory. * * @param directory */ public PostcodeAggregator(File directory) { this.directory = directory; } /** * This is the main method where the program begins execution. One or no * arguments are expected. A single argument gives the file location of the * input data. Without this argument it is assumed that the program is run * from the location of the input data. The output will also be written to * this location. The input data are expected in 4 files, 3 of which are * expected in containing File directories as follows: * "STANDARD_WARD_LUT/STANDARD_WARD_LUT.csv"; * "COUNCIL_AREA_LUT/COUNCIL_AREA_LUT.csv"; * "OA_TO_HIGHER_AREAS_LUT/OA_TO_HIGHER_AREAS_LUT.csv". The other file is * expected to be named "scotland- output area OAC dan vickers data.csv". * This file contains population data and area classifications at Output * Area level derived from the 2001 UK Population census. These data were * derived by Dan Vickers. * * @param args the command line arguments */ public static void main(String[] args) { // Check args File directory = null; if (args.length == 0) { //directory = new File(System.getProperty("user.dir")); directory = new File("/scratch01/Work/geog3600/Greg/"); } else { if (args.length == 1) { directory = new File(args[0]); } } if (directory.exists() == false) { System.err.println( "Directory " + directory + " does not exist. Program exiting."); System.exit(2); } File[] files = directory.listFiles(); HashSet files_HashSet = new HashSet(); files_HashSet.addAll(Arrays.asList(files)); String filename; filename = "Grocery Retailers Nov2011 - final.csv"; checkForFile(files_HashSet, directory, filename); filename = "Population Scotland.csv"; checkForFile(files_HashSet, directory, filename); filename = "Population England + Wales.csv"; checkForFile(files_HashSet, directory, filename); new PostcodeAggregator(directory).run(); } private static void checkForFile( HashSet files_HashSet, File directory, String filename) { File file = new File( directory, filename); if (!files_HashSet.contains(file)) { System.err.println( "Directory " + directory + " does not contain directory " + filename + " in " + className + ". " + "Program exiting."); System.exit(2); } } public void run() { String filename; File file; Iterator ite; HashSet numerals_HashSet = getNumeralsHashSet(); // Read Grocery Retailers File into a map for linking filename = "Grocery Retailers Nov2011 - final.csv"; file = new File( directory, filename); Object[] theGroceryRetailerData = readGroceryRetailers_File(file, numerals_HashSet); HashMap postcodeAreaGroceryRetailerData = (HashMap) theGroceryRetailerData[0]; HashMap postcodeDistrictGroceryRetailerData = (HashMap) theGroceryRetailerData[1]; HashMap postcodeSectorGroceryRetailerData = (HashMap) theGroceryRetailerData[2]; // Read Scotland Population Data filename = "Population Scotland.csv"; file = new File( directory, filename); Object[] populationScotlandData = readScotlandPopulation( file, numerals_HashSet); HashMap theScotlandPostcodeAreaPopulation_HashMap = (HashMap) populationScotlandData[0]; HashMap theScotlandPostcodeDistrictPopulation_HashMap = (HashMap) populationScotlandData[1]; HashMap theScotlandPostcodeSectorPopulation_HashMap = (HashMap) populationScotlandData[2]; // // Check // ite = theScotlandPostcodeAreaPopulation_HashMap.keySet().iterator(); // int counter = 0; // while (ite.hasNext()) { // String postcodeArea = ite.next(); // Long population = theScotlandPostcodeAreaPopulation_HashMap.get(postcodeArea); // String line = // counter + "," + // postcodeArea + "," // + population; // System.out.println(line); // counter ++; // } // Read England and Wales Population Data filename = "Population England + Wales.csv"; file = new File( directory, filename); Object[] populationEnglandAndWalesData = readEnglandAndWalesPopulation( file, numerals_HashSet); HashMap theEnglandAndWalesDataPostcodeAreaPopulation_HashMap = (HashMap) populationEnglandAndWalesData[0]; HashMap theEnglandAndWalesDataPostcodeDistrictPopulation_HashMap = (HashMap) populationEnglandAndWalesData[1]; HashMap theEnglandAndWalesDataPostcodeSectorPopulation_HashMap = (HashMap) populationEnglandAndWalesData[2]; // // Check // ite = theEnglandAndWalesDataPostcodeAreaPopulation_HashMap.keySet().iterator(); // int counter = 0; // while (ite.hasNext()) { // String postcodeArea = ite.next(); // Long population = theEnglandAndWalesDataPostcodeAreaPopulation_HashMap.get(postcodeArea); // String line = // counter + "," // + postcodeArea + "," // + population; // System.out.println(line); // counter++; // } // Combine PopulationMaps HashMap theGBDataPostcodeAreaPopulation_HashMap = new HashMap(); HashMap theGBDataPostcodeSectorPopulation_HashMap = new HashMap(); HashMap theGBDataPostcodeDistrictPopulation_HashMap = new HashMap(); theGBDataPostcodeAreaPopulation_HashMap.putAll(theScotlandPostcodeAreaPopulation_HashMap); theGBDataPostcodeAreaPopulation_HashMap.putAll(theEnglandAndWalesDataPostcodeAreaPopulation_HashMap); theGBDataPostcodeSectorPopulation_HashMap.putAll(theScotlandPostcodeSectorPopulation_HashMap); theGBDataPostcodeSectorPopulation_HashMap.putAll(theEnglandAndWalesDataPostcodeSectorPopulation_HashMap); theGBDataPostcodeDistrictPopulation_HashMap.putAll(theScotlandPostcodeDistrictPopulation_HashMap); theGBDataPostcodeDistrictPopulation_HashMap.putAll(theEnglandAndWalesDataPostcodeDistrictPopulation_HashMap); File outputFile; PrintWriter pw; String line; // Write PostCodeArea output filename = "PostcodeAreaGroceryRetailerData.csv"; outputFile = new File( directory, filename); pw = null; try { pw = new PrintWriter(outputFile); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".run()"); System.exit(2); } line = "Postcode Area,Total Population,Total Floor Area"; System.out.println(line); pw.println(line); ite = theGBDataPostcodeAreaPopulation_HashMap.keySet().iterator(); while (ite.hasNext()) { String postcode = ite.next(); BigDecimal retailFloorAreaEstimate = postcodeAreaGroceryRetailerData.get(postcode); if (retailFloorAreaEstimate == null) { retailFloorAreaEstimate = BigDecimal.ZERO; } Long population = theGBDataPostcodeAreaPopulation_HashMap.get(postcode); line = postcode + "," + population + "," + retailFloorAreaEstimate; System.out.println(line); pw.println(line); } pw.close(); // Write PostCode District output filename = "PostcodeDistrictGroceryRetailerData.csv"; outputFile = new File( directory, filename); pw = null; try { pw = new PrintWriter(outputFile); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".run()"); System.exit(2); } line = "Postcode District,Total Population,Total Floor Area"; System.out.println(line); pw.println(line); ite = theGBDataPostcodeDistrictPopulation_HashMap.keySet().iterator(); while (ite.hasNext()) { String postcode = ite.next(); BigDecimal retailFloorAreaEstimate = postcodeDistrictGroceryRetailerData.get(postcode); if (retailFloorAreaEstimate == null) { retailFloorAreaEstimate = BigDecimal.ZERO; } else { int debug = 1; } Long population = theGBDataPostcodeDistrictPopulation_HashMap.get(postcode); line = postcode + "," + population + "," + retailFloorAreaEstimate; System.out.println(line); pw.println(line); } pw.close(); // Write PostCode Sector output filename = "PostcodeSectorGroceryRetailerData.csv"; outputFile = new File( directory, filename); pw = null; try { pw = new PrintWriter(outputFile); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".run()"); System.exit(2); } line = "Postcode Sector,Total Population,Total Floor Area"; System.out.println(line); pw.println(line); ite = theGBDataPostcodeSectorPopulation_HashMap.keySet().iterator(); while (ite.hasNext()) { String postcode = ite.next(); BigDecimal retailFloorAreaEstimate = postcodeSectorGroceryRetailerData.get(postcode); if (retailFloorAreaEstimate == null) { retailFloorAreaEstimate = BigDecimal.ZERO; } Long population = theGBDataPostcodeSectorPopulation_HashMap.get(postcode); line = postcode + "," + population + "," + retailFloorAreaEstimate; System.out.println(line); pw.println(line); } pw.close(); } public Object[] readGroceryRetailers_File( File file, HashSet numerals_HashSet) { Object[] result = new Object[3]; HashMap postcodeArea_HashMap = new HashMap(); HashMap postcodeSector_HashMap = new HashMap(); HashMap postcodeDistrict_HashMap = new HashMap(); try { int lineCounter = 0; StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(lineCounter + " " + line); line = line.replaceAll(",,", ",null,"); //System.out.println(line); String[] fields = line.split(","); String unitPostcode = fields[5]; //System.out.println("unitPostcode " + unitPostcode); String[] unitPostcodeSplit = unitPostcode.split(" "); String postcodeArea = getPostCodeArea(numerals_HashSet,unitPostcodeSplit[0]); String postcodeDistrict = unitPostcodeSplit[0]; String postcodeSector = postcodeDistrict + " " + unitPostcodeSplit[1].substring(0, 1); BigDecimal floorSpace; floorSpace = getFloorspace(fields[10]); if (postcodeArea_HashMap.containsKey(postcodeArea)) { floorSpace = floorSpace.add((BigDecimal) postcodeArea_HashMap.get(postcodeArea)); } postcodeArea_HashMap.put(postcodeArea, floorSpace); floorSpace = getFloorspace(fields[10]); if (postcodeDistrict_HashMap.containsKey(postcodeDistrict)) { floorSpace = floorSpace.add((BigDecimal) postcodeDistrict_HashMap.get(postcodeDistrict)); } postcodeDistrict_HashMap.put(postcodeDistrict, floorSpace); floorSpace = getFloorspace(fields[10]); if (postcodeSector_HashMap.containsKey(postcodeSector)) { floorSpace = floorSpace.add((BigDecimal) postcodeSector_HashMap.get(postcodeSector)); } postcodeSector_HashMap.put(postcodeSector, floorSpace); lineCounter++; break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readStandardWardLUT_File(File)"); System.exit(2); } result[0] = postcodeArea_HashMap; result[1] = postcodeDistrict_HashMap; result[2] = postcodeSector_HashMap; return result; } public String getPostCodeArea( HashSet numerals_HashSet, String firstPartOfPostCode) { if (firstPartOfPostCode.length() > 2) { firstPartOfPostCode = firstPartOfPostCode.substring(0,2); } String result = firstPartOfPostCode; Iterator ite = numerals_HashSet.iterator(); while (ite.hasNext()) { String numeral = ite.next(); if (firstPartOfPostCode.endsWith(numeral)) { return getPostCodeArea( numerals_HashSet, firstPartOfPostCode.substring(0,1)); } } return result; } private BigDecimal getFloorspace(String s) { BigDecimal result; try { result = new BigDecimal(s); } catch (NumberFormatException e) { result = BigDecimal.ZERO; } return result; } public Object[] readScotlandPopulation( File file, HashSet numerals_HashSet) { Object[] result = new Object[3]; HashMap postcodeArea_HashMap = new HashMap(); HashMap postcodeSector_HashMap = new HashMap(); HashMap postcodeDistrict_HashMap = new HashMap(); try { StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); String[] fields = line.split(","); if (fields.length < 2) { int debug = 1; } // System.out.println( // fields[0].substring(1) + ", " + // fields[1] + ", " + fields[2].substring(0,2)); String key = fields[0]; String[] split = key.split(" "); String postcodeArea = getPostCodeArea(numerals_HashSet, split[0]); String postcodeDistrict = split[0]; String postcodeSector = split[0] + " " + split[1].substring(0, 1); long population; population = Long.valueOf(fields[1]); if (postcodeArea_HashMap.containsKey(postcodeArea)) { population += postcodeArea_HashMap.get(postcodeArea); } postcodeArea_HashMap.put(postcodeArea, population); population = Long.valueOf(fields[1]); if (postcodeDistrict_HashMap.containsKey(postcodeDistrict)) { population += postcodeDistrict_HashMap.get(postcodeDistrict); } postcodeDistrict_HashMap.put(postcodeDistrict, population); population = Long.valueOf(fields[1]); if (postcodeSector_HashMap.containsKey(postcodeSector)) { population += postcodeSector_HashMap.get(postcodeSector); } postcodeSector_HashMap.put(postcodeSector, population); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readCouncilAreaLUT_File(File)"); System.exit(2); } result[0] = postcodeArea_HashMap; result[1] = postcodeDistrict_HashMap; result[2] = postcodeSector_HashMap; return result; } public Object[] readEnglandAndWalesPopulation( File file, HashSet numerals_HashSet) { Object[] result = new Object[3]; HashMap postcodeArea_HashMap = new HashMap(); HashMap postcodeSector_HashMap = new HashMap(); HashMap postcodeDistrict_HashMap = new HashMap(); try { int lineCounter = 0; StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first 3 lines int tokenType; skipline(aStreamTokenizer); skipline(aStreamTokenizer); skipline(aStreamTokenizer); skipline(aStreamTokenizer); skipline(aStreamTokenizer); tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: if (!line.equalsIgnoreCase(",,,,,,,,")) { //line = line.replaceAll(",,", ",null,"); String[] fields = line.split("\""); if (fields[0].startsWith(" ")) { //System.out.println(lineCounter + " " + line); if (fields.length < 2) { // For these records the delimeter is "," fields = line.split(","); //int debug = 1; } String[] postcodeDistrictParts = fields[0].trim().replace(" ", " ").replace(" ", " ").split(" "); if (postcodeDistrictParts.length < 2) { System.out.println(lineCounter + " " + line); int debug = 1; } if (postcodeDistrictParts[1].length() < 1) { System.out.println(lineCounter + " " + line); int debug = 1; } String postcodeSector = postcodeDistrictParts[0] + " " + postcodeDistrictParts[1].substring(0, 1); String postcodeArea = getPostCodeArea(numerals_HashSet,postcodeDistrictParts[0]); String postcodeDistrict = postcodeDistrictParts[0]; //System.out.println("postcodeDistrict " + postcodeDistrict); //System.out.println("postcodeArea " + postcodeArea); //System.out.println("postcodeSector " + postcodeSector); long population0 = formatEnglandAndWalesPopulation(fields[1]); //long population0 = 0; long population; population = population0; if (postcodeArea_HashMap.containsKey(postcodeArea)) { population += postcodeArea_HashMap.get(postcodeArea); } postcodeArea_HashMap.put(postcodeArea, population); population = population0; if (postcodeDistrict_HashMap.containsKey(postcodeDistrict)) { population += postcodeDistrict_HashMap.get(postcodeDistrict); } postcodeDistrict_HashMap.put(postcodeDistrict, population); population = population0; if (postcodeSector_HashMap.containsKey(postcodeSector)) { population += postcodeSector_HashMap.get(postcodeSector); } postcodeSector_HashMap.put(postcodeSector, population); } } lineCounter++; break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readCouncilAreaLUT_File(File)"); System.exit(2); } result[0] = postcodeArea_HashMap; result[1] = postcodeDistrict_HashMap; result[2] = postcodeSector_HashMap; return result; } private long formatEnglandAndWalesPopulation(String s) { long result = 0; //try { //System.out.println(s); String[] populationParts = s.split(","); String population = ""; for (int i = 0; i < populationParts.length; i++) { population = population + populationParts[i]; } if (population.equalsIgnoreCase("")) { population = "0"; } result = Long.valueOf(population); //System.out.println(result); // } catch (ArrayIndexOutOfBoundsException e) { // int debug = 1; // } return result; } public HashSet getNumeralsHashSet() { HashSet numerals_HashSet = new HashSet(); numerals_HashSet.add("0"); numerals_HashSet.add("1"); numerals_HashSet.add("2"); numerals_HashSet.add("3"); numerals_HashSet.add("4"); numerals_HashSet.add("5"); numerals_HashSet.add("6"); numerals_HashSet.add("7"); numerals_HashSet.add("8"); numerals_HashSet.add("9"); return numerals_HashSet; } private void skipline(StreamTokenizer aStreamTokenizer) { int tokenType; try { tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } } catch (IOException ex) { Logger.getLogger(PostcodeAggregator.class.getName()).log( Level.SEVERE, null, ex); } } private StreamTokenizer getStreamTokeniser(File file) { StreamTokenizer aStreamTokenizer = null; try { BufferedReader aBufferedReader = new BufferedReader( new InputStreamReader( new FileInputStream(file))); aStreamTokenizer = new StreamTokenizer(aBufferedReader); aStreamTokenizer.resetSyntax(); aStreamTokenizer.wordChars(',', ','); aStreamTokenizer.wordChars('"', '"'); aStreamTokenizer.wordChars('\'', '\''); aStreamTokenizer.wordChars('&', '&'); aStreamTokenizer.wordChars(';', ';'); aStreamTokenizer.wordChars('(', '('); aStreamTokenizer.wordChars(')', ')'); aStreamTokenizer.wordChars('0', '0'); aStreamTokenizer.wordChars('1', '1'); aStreamTokenizer.wordChars('2', '2'); aStreamTokenizer.wordChars('3', '3'); aStreamTokenizer.wordChars('4', '4'); aStreamTokenizer.wordChars('5', '5'); aStreamTokenizer.wordChars('6', '6'); aStreamTokenizer.wordChars('7', '7'); aStreamTokenizer.wordChars('8', '8'); aStreamTokenizer.wordChars('9', '9'); aStreamTokenizer.wordChars('.', '.'); aStreamTokenizer.wordChars('-', '-'); aStreamTokenizer.wordChars('+', '+'); aStreamTokenizer.wordChars('a', 'z'); aStreamTokenizer.wordChars('A', 'Z'); aStreamTokenizer.wordChars('\t', '\t'); aStreamTokenizer.wordChars(' ', ' '); aStreamTokenizer.wordChars('#', '#'); aStreamTokenizer.wordChars('*', '*'); aStreamTokenizer.wordChars(':', ':'); String s = "/"; char c = s.charAt(0); int c_int = (int) c; //System.out.println("s " + s + " c " + c + " c_int " + c_int) ; aStreamTokenizer.wordChars(c_int, c_int); aStreamTokenizer.eolIsSignificant(true); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".getStreamTokeniser(File)"); System.exit(2); } return aStreamTokenizer; } }