/** * Copyright 2012 Andy Turner, The University of Leeds, UK * * Redistribution and use of this software in source and binary forms, with or * without modification is permitted. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package uk.ac.leeds.ccg.andyt.projects.geog3600.fiona; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.StreamTokenizer; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.TreeMap; import java.util.TreeSet; /** * A class for aggregating Output Area Classification data for Scotland. This * class was originally developed for Fiona Mackenzie for an undergraduate * Geography dissertation at the University of Leeds in 2012. The class is self * contained and relies only on the core Java language. (The program does not * rely on any other libraries to run). The program was developed to use 3 look * up tables which were made available by the EDINA UKBorders Service. These * data provide codes and names for producing aggregate outputs for Standard * Wards in Scotland which took into account output area classifications as * produced by Dan Vickers. The output file generated is (like the input files) * an ASCII comma separated version file. This file is output in a file named * "AggregatedData.csv" with a header line. The six fields as per the header * line are: STWardCode,"STWardName","Council Area Name",NumberOfOutputAreas, * TotalPopulation,TotalProsperingSuburbPopulation". */ public class AreaClassificationAggregator { /** * A name for exception and error handling */ private static final String className = "AreaClassificationAggregator"; /** * File directory from which data is read and written to. */ private File directory; /** * Creates a new instance of this class using directory. * * @param directory */ public AreaClassificationAggregator(File directory) { this.directory = directory; } /** * This is the main method where the program begins execution. One or no * arguments are expected. A single argument gives the file location of the * input data. Without this argument it is assumed that the program is run * from the location of the input data. Output files are written to this * location. The input data are expected in 4 files, 3 of which are expected * in containing directories as follows: * "STANDARD_WARD_LUT/STANDARD_WARD_LUT.csv"; * "COUNCIL_AREA_LUT/COUNCIL_AREA_LUT.csv"; * "OA_TO_HIGHER_AREAS_LUT/OA_TO_HIGHER_AREAS_LUT.csv". These look up table * files were downloaded from the EDINA UKBorders Service * (http://edina.ac.uk/ukborders/). The other file is expected to be named * "scotland- output area OAC dan vickers data.csv", this file was converted * to csv from data made available via Dan Vickers 2001 Census Output Area * Classification Web Pages * (http://www.sasi.group.shef.ac.uk/area_classification/). This * classification file contains population data and area classifications at * Output Area level for Scotland as derived from the from the 2001 UK * Population census. * * @param args the command line arguments */ public static void main(String[] args) { // Check args File directory = null; if (args.length == 0) { //directory = new File(System.getProperty("user.dir")); directory = new File("/scratch01/Work/geog3600/Fiona/"); } else { if (args.length == 1) { directory = new File(args[0]); } } if (directory.exists() == false) { System.err.println( "Directory " + directory + " does not exist. Program exiting."); System.exit(2); } else { File[] files = directory.listFiles(); HashSet files_HashSet = new HashSet(); files_HashSet.addAll(Arrays.asList(files)); String inputFilename; inputFilename = "STANDARD_WARD_LUT"; checkForInputFile(files_HashSet, directory, inputFilename); inputFilename = "COUNCIL_AREA_LUT"; checkForInputFile(files_HashSet, directory, inputFilename); inputFilename = "OA_TO_HIGHER_AREAS_LUT"; checkForInputFile(files_HashSet, directory, inputFilename); } new AreaClassificationAggregator(directory).run(); } /** * Checks that the Look up table data file given by directory and filename * exist and exits the program in error if it does not. * * @param files_HashSet * @param directory * @param filename */ private static void checkForInputFile( HashSet files_HashSet, File directory, String filename) { File file = new File( directory, filename); if (!files_HashSet.contains(file)) { System.err.println( "Directory " + directory + " does not contain directory " + filename + " in " + className + ".checkForInputFile(HashSet,File). " + "Program exiting."); System.exit(2); } else { filename += ".csv"; File csvFile = new File( file, filename); if (!csvFile.exists()) { System.err.println( "File " + csvFile + " does not exist in " + className + ".checkForInputFile(HashSet,File). " + "Program exiting."); System.exit(2); } } } public void run() { String filename; File file; Iterator ite; // Read STANDARD_WARD_LUT into a map for linking filename = "STANDARD_WARD_LUT"; file = getNestedFile(filename); HashMap standardWardLUT = readStandardWardLUT_File(file); // Read COUNCIL_AREA_LUT into a map for linking filename = "COUNCIL_AREA_LUT"; file = getNestedFile(filename); HashMap councilAreaLUT = readCouncilAreaLUT_File(file); // Read OA_TO_HIGHER_AREAS_LUT into a map for linking filename = "OA_TO_HIGHER_AREAS_LUT"; file = getNestedFile(filename); HashMap OA_TO_standardWardLUT = readOA_TO_HIGHER_AREAS_LUT_File(file); // Read AreaClassification data into maps for linking filename = "scotland- output area OAC dan vickers data.csv"; file = new File( directory, filename); Object[] classificationData = readOutputAreaClassification(file); HashMap classification = (HashMap) classificationData[0]; TreeSet areaTypeACSuperGroup = (TreeSet) classificationData[1]; TreeSet areaTypeACGroup = (TreeSet) classificationData[2]; TreeSet areaTypeACSubGroup = (TreeSet) classificationData[3]; // Initialise: areaTypeACSuperGroupStandarWardPopulations_TreeMap; // areaTypeACGroupStandarWardPopulations_TreeMap; and, // areaTypeACSubGroupStandarWardPopulations_TreeMap used to store the // aggregated data for each Standard Ward and for each area type in the // respective classification. TreeMap> areaTypeACSuperGroupStandarWardPopulations_TreeMap = new TreeMap>(); TreeMap> areaTypeACGroupStandarWardPopulations_TreeMap = new TreeMap>(); TreeMap> areaTypeACSubGroupStandarWardPopulations_TreeMap = new TreeMap>(); ite = standardWardLUT.keySet().iterator(); Iterator ite2; while (ite.hasNext()) { String standardWard = ite.next(); TreeMap areaTypeACSuperGroupStandarWardPopulation_TreeMap = new TreeMap(); TreeMap areaTypeACGroupStandarWardPopulation_TreeMap = new TreeMap(); TreeMap areaTypeACSubGroupStandarWardPopulation_TreeMap = new TreeMap(); // SuperGroup ite2 = areaTypeACSuperGroup.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); areaTypeACSuperGroupStandarWardPopulation_TreeMap.put(areaType, 0L); } areaTypeACSuperGroupStandarWardPopulations_TreeMap.put( standardWard, areaTypeACSuperGroupStandarWardPopulation_TreeMap); // Group ite2 = areaTypeACGroup.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); areaTypeACGroupStandarWardPopulation_TreeMap.put(areaType, 0L); } areaTypeACGroupStandarWardPopulations_TreeMap.put( standardWard, areaTypeACGroupStandarWardPopulation_TreeMap); // SubGroup ite2 = areaTypeACSubGroup.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); areaTypeACSubGroupStandarWardPopulation_TreeMap.put(areaType, 0L); } areaTypeACSubGroupStandarWardPopulations_TreeMap.put( standardWard, areaTypeACSubGroupStandarWardPopulation_TreeMap); } // Initialise totalOACount, totalPopulationCount, // areaTypeACGroupPopulationCounts, areaTypeACSuperGroupPopulationCounts HashMap totalOACount = new HashMap(); HashMap totalPopulationCount = new HashMap(); HashMap> areaTypeACSuperGroupPopulationCounts = new HashMap>(); HashMap> areaTypeACGroupPopulationCounts = new HashMap>(); HashMap> areaTypeACSubGroupPopulationCounts = new HashMap>(); // Calculate and add statistics to statistical maps ite = classification.keySet().iterator(); while (ite.hasNext()) { String aOACode = ite.next(); String aSTWardCode = OA_TO_standardWardLUT.get(aOACode); // System.out.println( // "aOACode " + aOACode + // " aSTWardCode " + aSTWardCode); String[] classificationObject = (String[]) classification.get(aOACode); long totalOACounter = 1; long totalPop = Long.valueOf(classificationObject[3]); // SuperGroup TreeMap areaTypeACSuperGroupStandarWardPopulation_TreeMap = areaTypeACSuperGroupStandarWardPopulations_TreeMap.get(aSTWardCode); ite2 = areaTypeACSuperGroup.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); if (classificationObject[0].equalsIgnoreCase(areaType)) { long pop = areaTypeACSuperGroupStandarWardPopulation_TreeMap.get( classificationObject[0]); pop += totalPop; areaTypeACSuperGroupStandarWardPopulation_TreeMap.put( classificationObject[0], pop); } } areaTypeACSuperGroupPopulationCounts.put( aSTWardCode, areaTypeACSuperGroupStandarWardPopulation_TreeMap); // Group TreeMap areaTypeACGroupStandarWardPopulation_TreeMap = areaTypeACGroupStandarWardPopulations_TreeMap.get(aSTWardCode); ite2 = areaTypeACGroup.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); if (classificationObject[1].equalsIgnoreCase(areaType)) { long pop = areaTypeACGroupStandarWardPopulation_TreeMap.get( classificationObject[1]); pop += totalPop; areaTypeACGroupStandarWardPopulation_TreeMap.put( classificationObject[1], pop); } } areaTypeACGroupPopulationCounts.put( aSTWardCode, areaTypeACGroupStandarWardPopulation_TreeMap); // SubGroup TreeMap areaTypeACSubGroupStandarWardPopulation_TreeMap = areaTypeACSubGroupStandarWardPopulations_TreeMap.get(aSTWardCode); ite2 = areaTypeACSubGroup.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); if (classificationObject[2].equalsIgnoreCase(areaType)) { long pop = areaTypeACSubGroupStandarWardPopulation_TreeMap.get( classificationObject[2]); pop += totalPop; areaTypeACSubGroupStandarWardPopulation_TreeMap.put( classificationObject[2], pop); } } areaTypeACSubGroupPopulationCounts.put( aSTWardCode, areaTypeACSubGroupStandarWardPopulation_TreeMap); // Total if (totalPopulationCount.containsKey(aSTWardCode)) { totalOACounter += totalOACount.get(aSTWardCode); totalPop += totalPopulationCount.get(aSTWardCode); } totalOACount.put(aSTWardCode, totalOACounter); totalPopulationCount.put(aSTWardCode, totalPop); } // Write output filename = "ACSuperGroupAggregatedData.csv"; writeOutput( filename, totalPopulationCount, totalOACount, areaTypeACSuperGroup, areaTypeACSuperGroupPopulationCounts, standardWardLUT, councilAreaLUT, OA_TO_standardWardLUT); filename = "ACGroupAggregatedData.csv"; writeOutput( filename, totalPopulationCount, totalOACount, areaTypeACGroup, areaTypeACGroupPopulationCounts, standardWardLUT, councilAreaLUT, OA_TO_standardWardLUT); filename = "ACSubGroupAggregatedData.csv"; writeOutput( filename, totalPopulationCount, totalOACount, areaTypeACSubGroup, areaTypeACSubGroupPopulationCounts, standardWardLUT, councilAreaLUT, OA_TO_standardWardLUT); } /** * Writes output to csv file. * * @param filename * @param totalPopulationCount * @param totalOACount * @param areaType_TreeSet * @param areaTypePopulationCounts * @param standardWardLUT * @param councilAreaLUT * @param OA_TO_standardWardLUTstandardWardLUT */ public void writeOutput( String filename, HashMap totalPopulationCount, HashMap totalOACount, TreeSet areaType_TreeSet, HashMap> areaTypePopulationCounts, HashMap standardWardLUT, HashMap councilAreaLUT, HashMap OA_TO_standardWardLUTstandardWardLUT) { File outputFile; PrintWriter pw; String line; outputFile = new File( directory, filename); pw = null; try { pw = new PrintWriter(outputFile); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".run()"); System.exit(2); } line = "STWardCode,\"STWardName\",\"Council Area Name\"," + "NumberOfOutputAreas,TotalPopulation"; Iterator ite = areaType_TreeSet.iterator(); while (ite.hasNext()) { String areaType = ite.next(); line += "," + areaType; } System.out.println(line); pw.println(line); ite = totalPopulationCount.keySet().iterator(); while (ite.hasNext()) { String aSTWardCode = ite.next(); //String aSTWardName = "aSTWardName"; String aSTWardName = ((String[]) standardWardLUT.get(aSTWardCode))[0]; String aCouncilAreaCode = ((String[]) standardWardLUT.get(aSTWardCode))[1]; String aCouncilAreaName = ((String[]) councilAreaLUT.get(aCouncilAreaCode))[0]; long totalOACounter = totalOACount.get(aSTWardCode); long totalPop = totalPopulationCount.get(aSTWardCode); line = aSTWardCode + "," + "\"" + aSTWardName + "\"," + "\"" + aCouncilAreaName + "\"," + totalOACounter + "," + totalPop; Iterator ite2 = areaType_TreeSet.iterator(); while (ite2.hasNext()) { String areaType = ite2.next(); line += "," + areaTypePopulationCounts.get(aSTWardCode).get(areaType); } System.out.println(line); pw.println(line); } pw.close(); } public File getNestedFile(String filename) { File file = new File( directory, filename); return new File( file, filename + ".csv"); } public HashMap readStandardWardLUT_File(File file) { HashMap result = new HashMap(); try { StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); String[] fields = line.split("\",\""); // if (fields.length < 3) { // int debug = 1; // } // System.out.println( // fields[0].substring(1) + ", " + // fields[1] + ", " + fields[2]); String key = fields[1]; String[] value = new String[2]; value[0] = fields[2]; value[1] = fields[0].substring(1); result.put(key, value); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readStandardWardLUT_File(File)"); System.exit(2); } return result; } public HashMap readCouncilAreaLUT_File(File file) { HashMap result = new HashMap(); try { StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); String[] fields = line.split("\",\""); if (fields.length < 3) { int debug = 1; } // System.out.println( // fields[0].substring(1) + ", " + // fields[1] + ", " + fields[2].substring(0,2)); String key = fields[0].substring(1); String[] value = new String[2]; value[0] = fields[1]; value[1] = fields[2].substring(0, 2); result.put(key, value); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readCouncilAreaLUT_File(File)"); System.exit(2); } return result; } public HashMap readOA_TO_HIGHER_AREAS_LUT_File(File file) { HashMap result = new HashMap(); try { StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); String[] fields = line.split(","); // if (fields.length < 20) { // int debug = 1; // } // System.out.println( // fields[8].subSequence(1,3) + ", " + // fields[9].substring(1,6) + ", " + // fields[25].substring(1,11)); String key = fields[25].substring(1, 11); String value = fields[9].substring(1, 6); result.put(key, value); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readOA_TO_HIGHER_AREAS_LUT_File(File)"); System.exit(2); } return result; } public Object[] readOutputAreaClassification(File file) { Object[] result = new Object[4]; TreeSet areaTypeACSuperGroup = new TreeSet(); TreeSet areaTypeACGroup = new TreeSet(); TreeSet areaTypeACSubGroup = new TreeSet(); HashMap oaPopulation_HashMap = new HashMap(); //HashMap oaMap_HashMap = new HashMap(); try { StreamTokenizer aStreamTokenizer = getStreamTokeniser(file); String line = ""; //Skip the first line int tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOL) { tokenType = aStreamTokenizer.nextToken(); } tokenType = aStreamTokenizer.nextToken(); while (tokenType != StreamTokenizer.TT_EOF) { switch (tokenType) { case StreamTokenizer.TT_EOL: //System.out.println(line); String[] fields = line.split(","); // if (fields.length < 3) { // int debug = 1; // } String key = fields[0]; String[] value = new String[4]; value[0] = fields[2]; areaTypeACSuperGroup.add(fields[2]); areaTypeACGroup.add(fields[4]); areaTypeACSubGroup.add(fields[6]); value[1] = fields[4]; value[2] = fields[6]; value[3] = fields[7]; oaPopulation_HashMap.put(key, value); // System.out.println( // key + ", " + value[0] + ", " + value[1]); break; case StreamTokenizer.TT_WORD: line = aStreamTokenizer.sval; break; } tokenType = aStreamTokenizer.nextToken(); } } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".readOutputAreaClassification(File)"); System.exit(2); } result[0] = oaPopulation_HashMap; result[1] = areaTypeACSuperGroup; result[2] = areaTypeACGroup; result[3] = areaTypeACSubGroup; return result; } private StreamTokenizer getStreamTokeniser(File file) { StreamTokenizer aStreamTokenizer = null; try { BufferedReader aBufferedReader = new BufferedReader( new InputStreamReader( new FileInputStream(file))); aStreamTokenizer = new StreamTokenizer(aBufferedReader); aStreamTokenizer.resetSyntax(); aStreamTokenizer.wordChars(',', ','); aStreamTokenizer.wordChars('"', '"'); aStreamTokenizer.wordChars('\'', '\''); aStreamTokenizer.wordChars('&', '&'); aStreamTokenizer.wordChars(';', ';'); aStreamTokenizer.wordChars('(', '('); aStreamTokenizer.wordChars(')', ')'); aStreamTokenizer.wordChars('0', '0'); aStreamTokenizer.wordChars('1', '1'); aStreamTokenizer.wordChars('2', '2'); aStreamTokenizer.wordChars('3', '3'); aStreamTokenizer.wordChars('4', '4'); aStreamTokenizer.wordChars('5', '5'); aStreamTokenizer.wordChars('6', '6'); aStreamTokenizer.wordChars('7', '7'); aStreamTokenizer.wordChars('8', '8'); aStreamTokenizer.wordChars('9', '9'); aStreamTokenizer.wordChars('.', '.'); aStreamTokenizer.wordChars('-', '-'); aStreamTokenizer.wordChars('+', '+'); aStreamTokenizer.wordChars('a', 'z'); aStreamTokenizer.wordChars('A', 'Z'); aStreamTokenizer.wordChars('\t', '\t'); aStreamTokenizer.wordChars(' ', ' '); aStreamTokenizer.wordChars('_', '_'); String s = "/"; char c = s.charAt(0); int c_int = (int) c; //System.out.println("s " + s + " c " + c + " c_int " + c_int) ; aStreamTokenizer.wordChars(c_int, c_int); aStreamTokenizer.eolIsSignificant(true); } catch (IOException aIOException) { System.err.println(aIOException.getMessage() + " in " + this.getClass().getName() + ".getStreamTokeniser(File)"); System.exit(2); } return aStreamTokenizer; } }